diff -urpN -X /home/fletch/.diff.exclude 000-virgin/Documentation/filesystems/proc.txt 901-mjb1.1/Documentation/filesystems/proc.txt
--- 000-virgin/Documentation/filesystems/proc.txt	Sat Jun 14 18:37:23 2003
+++ 901-mjb1.1/Documentation/filesystems/proc.txt	Wed Aug 13 20:27:51 2003
@@ -37,6 +37,7 @@ Table of Contents
   2.8	/proc/sys/net/ipv4 - IPV4 settings
   2.9	Appletalk
   2.10	IPX
+  2.11  /proc/sys/sched - scheduler tunables
 
 ------------------------------------------------------------------------------
 Preface
@@ -1751,6 +1752,104 @@ IPX.
 The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it
 gives the  destination  network, the router node (or Directly) and the network
 address of the router (or Connected) for internal networks.
+
+2.11 /proc/sys/sched - scheduler tunables
+-----------------------------------------
+
+Useful knobs for tuning the scheduler live in /proc/sys/sched.
+
+child_penalty
+-------------
+
+Percentage of the parent's sleep_avg that children inherit.  sleep_avg is
+a running average of the time a process spends sleeping.  Tasks with high
+sleep_avg values are considered interactive and given a higher dynamic
+priority and a larger timeslice.  You typically want this some value just
+under 100.
+
+exit_weight
+-----------
+
+When a CPU hog task exits, its parent's sleep_avg is reduced by a factor of
+exit_weight against the exiting task's sleep_avg.
+
+interactive_delta
+-----------------
+
+If a task is "interactive" it is reinserted into the active array after it
+has expired its timeslice, instead of being inserted into the expired array.
+How "interactive" a task must be in order to be deemed interactive is a
+function of its nice value.  This interactive limit is scaled linearly by nice
+value and is offset by the interactive_delta.
+
+max_sleep_avg
+-------------
+
+max_sleep_avg is the largest value (in ms) stored for a task's running sleep
+average.  The larger this value, the longer a task needs to sleep to be
+considered interactive (maximum interactive bonus is a function of
+max_sleep_avg).
+
+max_timeslice
+-------------
+
+Maximum timeslice, in milliseconds.  This is the value given to tasks of the
+highest dynamic priority.
+
+min_timeslice
+-------------
+
+Minimum timeslice, in milliseconds.  This is the value given to tasks of the
+lowest dynamic priority.  Every task gets at least this slice of the processor
+per array switch.
+
+parent_penalty
+--------------
+
+Percentage of the parent's sleep_avg that it retains across a fork().
+sleep_avg is a running average of the time a process spends sleeping.  Tasks
+with high sleep_avg values are considered interactive and given a higher
+dynamic priority and a larger timeslice.  Normally, this value is 100 and thus
+task's retain their sleep_avg on fork.  If you want to punish interactive
+tasks for forking, set this below 100.
+
+prio_bonus_ratio
+----------------
+
+Middle percentage of the priority range that tasks can receive as a dynamic
+priority.  The default value of 25% ensures that nice values at the
+extremes are still enforced.  For example, nice +19 interactive tasks will
+never be able to preempt a nice 0 CPU hog.  Setting this higher will increase
+the size of the priority range the tasks can receive as a bonus.  Setting
+this lower will decrease this range, making the interactivity bonus less
+apparent and user nice values more applicable.
+
+starvation_limit
+----------------
+
+Sufficiently interactive tasks are reinserted into the active array when they
+run out of timeslice.  Normally, tasks are inserted into the expired array.
+Reinserting interactive tasks into the active array allows them to remain
+runnable, which is important to interactive performance.  This could starve
+expired tasks, however, since the interactive task could prevent the array
+switch.  To prevent starving the tasks on the expired array for too long. the
+starvation_limit is the longest (in ms) we will let the expired array starve
+at the expense of reinserting interactive tasks back into active.  Higher
+values here give more preferance to running interactive tasks, at the expense
+of expired tasks.  Lower values provide more fair scheduling behavior, at the
+expense of interactivity.  The units are in milliseconds.
+
+idle_node_rebalance_ratio
+-------------------------
+
+On NUMA machines, we normally rebalance within nodes, but we also rebalance
+globally every N idle rebalance ticks, where N = idle_node_rebalance_ratio.
+
+busy_node_rebalance_ratio
+-------------------------
+
+On NUMA machines, we normally rebalance within nodes, but we also rebalance
+globally every N busy rebalance ticks, where N = busy_node_rebalance_ratio.
 
 ------------------------------------------------------------------------------
 Summary
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/Documentation/i386/gdb-serial.txt 901-mjb1.1/Documentation/i386/gdb-serial.txt
--- 000-virgin/Documentation/i386/gdb-serial.txt	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/Documentation/i386/gdb-serial.txt	Wed Aug 13 20:29:29 2003
@@ -0,0 +1,386 @@
+Version
+=======
+
+This version of the gdbstub package was developed and tested on
+kernel version 2.3.48.  It will not install on a 2.2 kernel.  It may
+not work on earlier versions of 2.3 kernels.  It is possible that
+it will continue to work on later versions of 2.3 and then
+versions of 2.4 (I hope).
+
+
+Debugging Setup
+===============
+
+Designate one machine as the "development" machine.  This is the
+machine on which you run your compiles and which has your source
+code for the kernel.  Designate a second machine as the "target"
+machine.  This is the machine that will run your experimental
+kernel.
+
+The two machines will be connected together via a serial line out
+one or the other of the COM ports of the PC.  You will need a modem
+eliminator and the appropriate cables.
+
+On the DEVELOPMENT machine you need to apply the patch for the gdb
+hooks.  You have probably already done that if you are reading this
+file.
+
+On your DEVELOPMENT machine, go to your kernel source directory and
+do "make menuconfig".  Go down to the kernel hacking menu item and
+open it up.  Enable the kernel gdb stub code by selecting that item.
+
+Save and exit the menuconfig program.  Then do "make clean" and
+"make bzImage" (or whatever target you want to make).  This gets
+the kernel compiled with the "-g" option set -- necessary for
+debugging.
+
+You have just built the kernel on your DEVELOPMENT machine that you
+intend to run on our TARGET machine.
+
+To install this new kernel, use the following installation procedure.
+Remember, you are on the DEVELOPMENT machine patching the kernel source
+for the kernel that you intend to run on the TARGET machine.
+
+Copy this kernel to your target machine using your usual procedures.
+I usually arrange to copy development:/usr/src/linux/arch/i386/boot/zImage
+to /vmlinuz on the TARGET machine via a LAN based NFS access.  That is,
+I run the cp command on the target and copy from the development machine
+via the LAN.  Run Lilo on the new kernel on the target machine so that it
+will boot!  Then boot the kernel on the target machine.
+
+There is an utility program named "gdbstart" in the
+development:/usr/src/linux/arch/i386/kernel directory.
+You should copy this program over to your target machine, probably into
+/sbin.  This utility program is run on the target machine to
+activate the kernel hooks for the debugger.  It is invoked as follows:
+
+    gdbstart [-s speed] [-t tty-dev]
+    defaults:  /dev/ttyS0 with speed unmodified by gdbstart
+
+Don't run the program just yet.  We'll get to that in a bit.
+
+Decide on which tty port you want the machines to communicate, then
+cable them up back-to-back using the null modem.  COM1 is /dev/ttyS0
+and COM2 is /dev/ttyS1.
+
+On the DEVELOPMENT machine, create a file called .gdbinit in the
+directory /usr/src/linux.  An example .gdbinit file looks like this:
+
+define rmt
+set remotebaud 38400
+target remote /dev/ttyS0
+end
+
+Assuming that you added my gdbinit stuff to your .gdbinit, edit .gdbinit
+and find the section that looks like this:
+
+	define rmt
+	set remotebaud 38400
+	target remote /dev/ttyS0
+	end
+
+Change the "target" definition so that it specifies the tty port that
+you intend to use.  Change the "remotebaud" definition to match the
+data rate that you are going to use for the com line.
+
+On the TARGET machine I find it helpful to create shell script file
+named "debug" in the root home directory with the following contents:
+
+	gdbstart -s 38400 -t /dev/ttyS0 <<EOF
+	<blank line>
+	EOF
+
+This runs the gdbstart program and gives it the carriage return that
+it prompts for.  This sets the data rate from the target machine's side.
+
+You are now ready to try it out.
+
+On your TARGET machine, freshly rebooted with your gdbstub-equipped
+kernel, type "debug" in the root home directory.  The system will appear
+to hang with some messages on the screen from the debug stub.  What
+it is doing is waiting for contact from the development machine.
+
+On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux".
+When gdb gets the symbols loaded and prompts you, enter "rmt" (that's
+the macro from the .gdbinit file that you just edited).  If everything
+is working correctly you should see gdb print out a few lines indicating
+that a breakpoint has been taken.  It will actually show a line of
+code in the target kernel inside the gdbstub activation code.
+
+The gdb interaction should look something like this:
+
+    linux-dev:/usr/src/linux# gdb vmlinux
+    GDB is free software and you are welcome to distribute copies of it
+     under certain conditions; type "show copying" to see the conditions.
+    There is absolutely no warranty for GDB; type "show warranty" for details.
+    GDB 4.15.1 (i486-slackware-linux), 
+    Copyright 1995 Free Software Foundation, Inc...
+    (gdb) rmt
+    breakpoint () at i386-stub.c:750
+    750     }
+    (gdb) 
+
+
+You can now use whatever gdb commands you like to set breakpoints.
+Enter "continue" to start your target machine executing again.  At this
+point the target system will run at full speed until it encounters
+your breakpoint or gets a segment violation in the kernel, or whatever.
+
+
+Triggering gdbstub at Kernel Boot Time
+======================================
+
+The gdbstub patch now has the ability for gdb to connect to the kernel during
+bootup (as opposed to waiting for the system to come all the way up and then
+running the gdbstart program on the target machine).  This new functionality was
+added by Scott Foehner <sfoehner@engr.sgi.com> at SGI.
+
+To force a kernel that has been compiled with gdbstub to pause during the boot
+process and wait for a connection from gdb, the paramter "gdb" should be passed
+to the kernel. This can be done by typing "gdb" after the name of the kernel
+on the LILO command line.  The patch defaults to use ttyS1 at a baud rate of
+38400. These parameters can be changed by using "gdbttyS=<port number>" and
+"gdbbaud=<baud rate>" on the command line.
+
+Example:
+
+LILO boot: linux gdb gdbttyS=1 gdbbaud=38400
+
+Note that this command is entered on the TARGET machine as it is booting
+the kernel that was compiled on the DEVELOPMENT machine.
+
+An alternate approach is to place a line in the /etc/lilo.conf file on
+your TARGET machine.  Under the heading for the kernel that you intend
+to boot, place a line that looks like this:
+
+    append = "gdb gdbttyS=1 gdbbaud=38400"
+
+This will cause the kernel to enter the gdbstub automatically at boot
+time.
+
+BE SURE to run "lilo" after changing the /etc/lilo.conf file.
+
+
+The "gdbstart" Program
+=====================
+
+This utility program is used to set up the com port and data rate
+for the connection from the target system to the development system.
+Its usage has been described above.
+
+This version of the patch uses the same tty ioctl for kernel versions
+2.0.30 onwards.  Thus, the gdbstart utility does not need to be re-compiled
+to install the patch in a later version of the kernel.  The ioctl added
+to the kernel for this purpose is far enough "off the end" of existing
+ioctls (as of 2.1.120) that it should not interfere with any new kernel
+tty ioctls for quite some time (famous last words).
+
+The source for the gdbstart program resides in the arch/i386/kernel directory.
+
+
+Debugging hints
+===============
+
+You can break into the target machine at any time from the development
+machine by typing ^C.  If the target machine has interrupts enabled
+this will stop it in the kernel and enter the debugger.
+
+There is unfortunately no way of breaking into the kernel if it is
+in a loop with interrupts disabled, so if this happens to you then
+you need to place exploratory breakpoints or printk's into the kernel
+to find out where it is looping.
+
+There is a copy of an e-mail in the kgdb distribution directory which
+describes how to create an NMI on an ISA bus machine using a paper
+clip.  I have a sophisticated version of this made by wiring a push
+button switch into a PC104/ISA bus adapter card.  The adapter card
+nicely furnishes wire wrap pins for all the ISA bus signals.
+
+When you are done debugging the kernel on the target machine it is
+a good idea to leave it in a running state.  This makes reboots
+faster, bypassing the fsck.  So do a gdb "continue" as the last gdb
+command if this is possible.  To terminate gdb itself on the development
+machine and leave the target machine running, type ^Z to suspend gdb
+and then kill it with "kill %1" or something similar.
+
+If gdbstub Does Not Work
+========================
+
+If it doesn't work, you will have to troubleshoot it.  Do the easy things
+first like double checking your cabling and data rates.  You might
+try some non-kernel based programs to see if the back-to-back connection
+works properly.  Just something simple like cat /etc/hosts >/dev/ttyS0
+on one machine and cat /dev/ttyS0 on the other will tell you if you
+can send data from one machine to the other.  There is no point in tearing
+out your hair in the kernel if the line doesn't work.
+
+All of the real action takes place in the file
+/usr/src/linux/arch/i386/kernel/gdbstub.c.  That is the code on the target
+machine that interacts with gdb on the development machine.  In gdb you can
+turn on a debug switch with the following command:
+
+	set remotedebug
+
+This will print out the protocol messages that gdb is exchanging with
+the target machine.
+
+Another place to look is /usr/src/linux/drivers/char/gdbserial.c
+That is the code that talks to the serial port on the target side.
+There might be a problem there.
+
+If you are really desperate you can use printk debugging in the
+gdbstub code in the target kernel until you get it working.  In particular,
+there is a global variable in /usr/src/linux/arch/i386/kernel/gdbstub.c
+named "remote_debug".  Compile your kernel with this set to 1, rather
+than 0 and the debug stub will print out lots of stuff as it does
+what it does.
+
+
+Debugging Loadable Modules
+==========================
+
+This technique comes courtesy of Edouard Parmelan
+<Edouard.Parmelan@quadratec.fr>
+
+When you run gdb, enter the command
+
+source gdbinit-modules
+
+This will read in a file of gdb macros that was installed in your
+kernel source directory with kgdb was installed.  This file implements
+the following commands:
+
+mod-list
+    Lists the loaded modules in the form <module-address> <module-name>
+
+mod-print-symbols <module-address>
+    Prints all the symbols in the indicated module.
+
+mod-add-symbols <module-address> <object-file-path-name>
+    Loads the symbols from the object file and associates them
+    with the indicated module.
+
+After you have loaded the module that you want to debug, use the command
+mod-list to find the <module-address> of your module.  Then use that
+address in the mod-add-symbols command to load your module's symbols.
+From that point onward you can debug your module as if it were a part
+of the kernel.
+
+The file gdbinit-modules also contains a command named mod-add-lis as
+an example of how to construct a command of your own to load your
+favorite module.  The idea is to "can" the pathname of the module
+in the command so you don't have to type so much.
+
+Threads
+=======
+
+Each process in a target machine is seen as a gdb thread. gdb thread related
+commands (info threads, thread n) can be used. 
+
+ia-32 hardware breakpoints
+==========================
+
+gdb stub contains support for hardware breakpoints using debugging features
+of ia-32(x86) processors. These breakpoints do not need code modification.
+They use debugging registers. 4 hardware breakpoints are available in ia-32
+processors.
+
+Each hardware breakpoint can be of one of the following three types.
+1. Execution breakpoint - An Execution breakpoint is triggered when code at the
+	breakpoint address is executed.
+
+	As limited number of hardware breakpoints are available, it is advisable
+	to use software breakpoints ( break command ) instead of execution
+	hardware breakpoints, unless modification of code is to be avoided.
+
+2. Write breakpoint - A write breakpoint is triggered when memory location at the
+	breakpoint address is written.
+
+	A write or can be placed for data of variable length. Length of a write
+	breakpoint indicates length of the datatype to be watched. Length is 1
+	for 1 byte data , 2 for 2 byte data, 3 for 4 byte data.
+
+3. Access breakpoint - An access breakpoint is triggered when memory location at
+	the breakpoint address is either read or written.
+
+	Access breakpoints also have lengths similar to write breakpoints.
+
+IO breakpoints in ia-32 are not supported.
+
+Since gdb stub at present does not use the protocol used by gdb for hardware
+breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros
+for hardware breakpoints are described below.
+
+hwebrk	- Places an execution breakpoint
+	hwebrk breakpointno address
+hwwbrk	- Places a write breakpoint
+	hwwbrk breakpointno length address
+hwabrk	- Places an access breakpoint
+	hwabrk breakpointno length address
+hwrmbrk	- Removes a breakpoint
+	hwrmbrk breakpointno
+exinfo	- Tells whether a software or hardware breakpoint has occured.
+	Prints number of the hardware breakpoint if a hardware breakpoint has
+	occured.
+
+Arguments required by these commands are as follows
+breakpointno	- 0 to 3
+length		- 1 to 3
+address		- Memory location in hex digits ( without 0x ) e.g c015e9bc
+
+MP support
+==========
+
+When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb client,
+all the processors are forced to enter the debugger. Current thread
+corresponds to the thread running on the processor where breakpoint occured.
+Threads running on other processor(s) appear similar to other non running
+threads in the 'info threads' output.
+
+ia-32 hardware debugging registers on all processors are set to same values.
+Hence any hardware breakpoints may occur on any processor.
+
+gdb troubleshooting
+===================
+
+1. gdb hangs
+Kill it. restart gdb. Connect to target machine.
+
+2. gdb cannot connect to target machine (after killing a gdb and restarting
+another)
+If the target machine was not inside debugger when you killed gdb, gdb cannot
+connect because the target machine won't respond.
+In this case echo "Ctrl+C"(ascii 3) in the serial line.
+e.g. echo -e "\003" > /dev/ttyS1 
+This forces that target machine into debugger after which you can connect.
+
+3. gdb cannot connect even after echoing Ctrl+C into serial line
+Try changing serial line settings min to 1 and time to 0
+e.g. stty min 1 time 0 < /dev/ttyS1
+Try echoing again
+
+check serial line speed and set it to correct value if required
+e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1
+
+Final Items
+===========
+
+I picked up this code from Dave Grothe and enhanced it.
+
+If you make some really cool modification to this stuff, or if you 
+fix a bug, please let me know.
+
+Amit S. Kale
+<akale@veritas.com>
+
+(First kgdb by David Grothe <dave@gcom.com>)
+
+(modified by Tigran Aivazian <tigran@sco.com>)
+    Putting gdbstub into the kernel config menu.
+
+(modified by Scott Foehner <sfoehner@engr.sgi.com>)
+    Hooks for entering gdbstub at boot time.
+
+(modified by Amit S. Kale <akale@veritas.com>)
+    Threads, ia-32 hw debugging, mp support, console support,
+    nmi watchdog handling.
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/Documentation/sysrq.txt 901-mjb1.1/Documentation/sysrq.txt
--- 000-virgin/Documentation/sysrq.txt	Wed Mar 26 22:54:28 2003
+++ 901-mjb1.1/Documentation/sysrq.txt	Wed Aug 13 20:29:29 2003
@@ -77,6 +77,8 @@ On all -  write a character to /proc/sys
 'l'     - Send a SIGKILL to all processes, INCLUDING init. (Your system
           will be non-functional after this.)
 
+'g'	- Enter the kernel debugger (if configured and supported).
+
 'h'     - Will display help ( actually any other key than those listed
           above will display help. but 'h' is easy to remember :-)
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/Makefile 901-mjb1.1/Makefile
--- 000-virgin/Makefile	Wed Aug 13 20:24:17 2003
+++ 901-mjb1.1/Makefile	Wed Aug 13 21:09:46 2003
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 0
-EXTRAVERSION = -test3
+EXTRAVERSION = -test3-mjb1
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -76,6 +76,8 @@ HOSTCXX  	= g++
 HOSTCFLAGS	= -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
 HOSTCXXFLAGS	= -O2
 
+GCOV_FLAGS	= -fprofile-arcs -ftest-coverage
+
 
 # 	That's our default target when none is given on the command line
 #	Note that 'modules' will be added as a prerequisite as well, 
@@ -235,6 +237,8 @@ export	VERSION PATCHLEVEL SUBLEVEL EXTRA
 export CPPFLAGS NOSTDINC_FLAGS OBJCOPYFLAGS LDFLAGS
 export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE 
 export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
+export CFLAGS_NOGCOV
+
 
 export MODVERDIR := .tmp_versions
 
@@ -326,6 +330,10 @@ ifdef CONFIG_DEBUG_INFO
 CFLAGS		+= -g
 endif
 
+ifdef CONFIG_DEBUG_SYMBOLS 
+CFLAGS += -g
+endif
+
 #
 # INSTALL_PATH specifies where to place the updated kernel and system map
 # images.  Uncomment if you want to place them anywhere other than root.
@@ -534,6 +542,11 @@ depend dep:
 # ---------------------------------------------------------------------------
 # Modules
 
+CFLAGS_NOGCOV := $(CFLAGS)
+ifdef CONFIG_GCOV_ALL
+CFLAGS += $(GCOV_FLAGS)
+endif
+
 ifdef CONFIG_MODULES
 
 # 	By default, build modules as well
@@ -716,6 +729,7 @@ clean: archclean $(clean-dirs)
 	$(call cmd,rmclean)
 	@find . $(RCS_FIND_IGNORE) \
 	 	\( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
+		-o -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \
 		-type f -print | xargs rm -f
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/Kconfig 901-mjb1.1/arch/i386/Kconfig
--- 000-virgin/arch/i386/Kconfig	Wed Aug 13 20:24:18 2003
+++ 901-mjb1.1/arch/i386/Kconfig	Wed Aug 13 21:05:43 2003
@@ -449,17 +449,17 @@ config NR_CPUS
 	  This is purely to save memory - each supported CPU adds
 	  approximately eight kilobytes to the kernel image.
 
-config PREEMPT
-	bool "Preemptible Kernel"
-	help
-	  This option reduces the latency of the kernel when reacting to
-	  real-time or interactive events by allowing a low priority process to
-	  be preempted even if it is in kernel mode executing a system call.
-	  This allows applications to run more reliably even when the system is
-	  under load.
-
-	  Say Y here if you are building a kernel for a desktop, embedded
-	  or real-time system.  Say N if you are unsure.
+# config PREEMPT
+#	bool "Preemptible Kernel"
+#	help
+#	  This option reduces the latency of the kernel when reacting to
+#	  real-time or interactive events by allowing a low priority process to
+#	  be preempted even if it is in kernel mode executing a system call.
+#	  This allows applications to run more reliably even when the system is
+#	  under load.
+#
+#	  Say Y here if you are building a kernel for a desktop, embedded
+#	  or real-time system.  Say N if you are unsure.
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors" if !SMP
@@ -680,6 +680,43 @@ config HIGHMEM64G
 
 endchoice
 
+choice
+	help
+	  On i386, a process can only virtually address 4GB of memory.  This
+	  lets you select how much of that virtual space you would like to 
+	  devoted to userspace, and how much to the kernel.
+
+	  Some userspace programs would like to address as much as possible and 
+	  have few demands of the kernel other than it get out of the way.  These
+	  users may opt to use the 3.5GB option to give their userspace program 
+	  as much room as possible.  Due to alignment issues imposed by PAE, 
+	  the "3.5GB" option is unavailable if "64GB" high memory support is 
+	  enabled.
+
+	  Other users (especially those who use PAE) may be running out of
+	  ZONE_NORMAL memory.  Those users may benefit from increasing the
+	  kernel's virtual address space size by taking it away from userspace, 
+	  which may not need all of its space.  An indicator that this is 
+	  happening is when /proc/Meminfo's "LowFree:" is a small percentage of
+	  "LowTotal:" while "HighFree:" is very large.
+
+	  If unsure, say "3GB"
+	prompt "User address space size"
+        default 1GB
+	
+config	05GB
+	bool "3.5 GB"
+	
+config	1GB
+	bool "3 GB"
+	
+config	2GB
+	bool "2 GB"
+	
+config	3GB
+	bool "1 GB"
+endchoice
+
 config HIGHMEM
 	bool
 	depends on HIGHMEM64G || HIGHMEM4G
@@ -697,6 +734,11 @@ config NUMA
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT)
 
+config NUMA_SCHED
+	bool "Numa Scheduling Support"
+	depends on NUMA
+	default y
+
 # Need comments to help the hapless user trying to turn on NUMA support
 comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
 	depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
@@ -782,6 +824,33 @@ config MTRR
 
 	  See <file:Documentation/mtrr.txt> for more information.
 
+choice
+	help
+	  This is unrelated to your processor's speed.  This variable alters
+	  how often the system is asked to generate timer interrupts.  A larger
+	  value can lead to a more responsive system, but also causes extra 
+	  overhead from the increased number of context switches.
+	    
+	  If in doubt, leave it at the default of 1000. 
+
+	prompt "Kernel HZ"
+	default 1000HZ
+
+config	100HZ
+	bool "100 Hz"
+
+config	1000HZ
+	bool "1000 Hz"
+endchoice
+
+config IRQBALANCE
+ 	bool "Enable kernel irq balancing"
+	depends on SMP
+	default y
+	help
+ 	  The defalut yes will allow the kernel to do irq load balancing.  
+	  Saying no will keep the kernel from doing irq load balancing. 	
+
 config HAVE_DEC_LOCK
 	bool
 	depends on (SMP || PREEMPT) && X86_CMPXCHG
@@ -1245,6 +1314,36 @@ source "net/bluetooth/Kconfig"
 
 source "arch/i386/oprofile/Kconfig"
 
+menu "GCOV coverage profiling"
+
+config GCOV_PROFILE
+	bool "GCOV coverage profiling"
+	---help---
+	Provide infrastructure for coverage support for the kernel. This
+	will not compile the kernel by default with the necessary flags.
+	To obtain coverage information for the entire kernel, one should
+	enable the subsequent option (Profile entire kernel). If only
+	particular files or directories of the kernel are desired, then
+	one must provide the following compile options for such targets:
+      		"-fprofile-arcs -ftest-coverage" in the CFLAGS. To obtain
+	access to the coverage data one must insmod the gcov-proc kernel
+	module.
+
+config GCOV_ALL
+	bool "GCOV_ALL"
+	depends on GCOV_PROFILE
+	---help---
+	If you say Y here, it will compile the entire kernel with coverage
+	option enabled.
+
+config GCOV_PROC
+	tristate "gcov-proc module"
+	depends on GCOV_PROFILE && PROC_FS
+	---help---
+	This is the gcov-proc module that exposes gcov data through the 
+	/proc filesystem
+
+endmenu
 
 menu "Kernel hacking"
 
@@ -1254,6 +1353,14 @@ config DEBUG_KERNEL
 	  Say Y here if you are developing drivers or trying to debug and
 	  identify kernel problems.
 
+config DEBUG_SYMBOLS_PROMPT
+	bool "Get debug symbols (turns on -g)"
+	depends on DEBUG_KERNEL
+
+config DEBUG_SYMBOLS
+	bool
+	depends on DEBUG_SYMBOLS_PROMPT || X86_REMOTE_DEBUG
+
 config DEBUG_STACKOVERFLOW
 	bool "Check for stack overflows"
 	depends on DEBUG_KERNEL
@@ -1266,6 +1373,17 @@ config DEBUG_SLAB
 	  allocation as well as poisoning memory on free to catch use of freed
 	  memory.
 
+config X86_REMOTE_DEBUG
+	bool "KGDB: Remote (serial) kernel debugging with gdb"
+
+config KGDB_THREAD
+	bool "KGDB: Thread analysis"
+	depends on X86_REMOTE_DEBUG
+
+config GDB_CONSOLE
+	bool "KGDB: Console messages through gdb"
+	depends on X86_REMOTE_DEBUG
+
 config DEBUG_IOVIRT
 	bool "Memory mapped I/O debugging"
 	depends on DEBUG_KERNEL
@@ -1291,6 +1409,26 @@ config MAGIC_SYSRQ
 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
+config X86_EARLY_PRINTK
+	bool "Early console support"
+	default n
+	depends on DEBUG_KERNEL
+	help
+	  Write kernel log output directly into the VGA buffer or serial port. 
+	  This is useful for kernel debugging when your machine crashes very 
+	  early before the console code is initialized. For normal operation 
+	  it is not recommended because it looks ugly and doesn't cooperate 
+	  with klogd/syslogd or the X server.You should normally N here, 
+	  unless you want to debug such a crash.
+
+	  Syntax: earlyprintk=vga
+		  earlyprintk=serial[,ttySn[,baudrate]] 
+	  Append ,keep to not disable it when the real console takes over.
+	  Only vga or serial at a time, not both.
+	  Currently only ttyS0 and ttyS1 are supported. 
+	  Interaction with the standard serial driver is not very good. 
+	  The VGA output is eventually overwritten by the real console.
+
 config DEBUG_SPINLOCK
 	bool "Spinlock debugging"
 	depends on DEBUG_KERNEL
@@ -1308,6 +1446,15 @@ config DEBUG_PAGEALLOC
 	  This results in a large slowdown, but helps to find certain types
 	  of memory corruptions.
 
+config SPINLINE
+	bool "Spinlock inlining"
+	depends on DEBUG_KERNEL
+	help
+	  This will change spinlocks from out of line to inline, making them
+	  account cost to the callers in readprofile, rather than the lock
+	  itself (as ".text.lock.filename"). This can be helpful for finding
+	  the callers of locks.
+
 config DEBUG_HIGHMEM
 	bool "Highmem debugging"
 	depends on DEBUG_KERNEL && HIGHMEM
@@ -1330,8 +1477,18 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.	
 
+config LOCKMETER
+	bool "Kernel lock metering"
+	depends on SMP
+	help
+	  Say Y to enable kernel lock metering, which adds overhead to SMP 
+	  locks, but allows you to see various statistics using the lockstat
+	  command
+
 config FRAME_POINTER
-	bool "Compile the kernel with frame pointers"
+	bool
+	default y if X86_REMOTE_DEBUG
+	default n if !X86_REMOTE_DEBUG
 	help
 	  If you say Y here the resulting kernel image will be slightly larger
 	  and slower, but it will give very useful debugging information.
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/Makefile 901-mjb1.1/arch/i386/Makefile
--- 000-virgin/arch/i386/Makefile	Tue Aug  5 20:01:48 2003
+++ 901-mjb1.1/arch/i386/Makefile	Wed Aug 13 20:27:43 2003
@@ -97,6 +97,7 @@ drivers-$(CONFIG_OPROFILE)		+= arch/i386
 
 CFLAGS += $(mflags-y)
 AFLAGS += $(mflags-y)
+AFLAGS_vmlinux.lds.o += -imacros $(TOPDIR)/include/asm-i386/page.h
 
 boot := arch/i386/boot
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/boot/compressed/Makefile 901-mjb1.1/arch/i386/boot/compressed/Makefile
--- 000-virgin/arch/i386/boot/compressed/Makefile	Mon Mar 17 21:43:38 2003
+++ 901-mjb1.1/arch/i386/boot/compressed/Makefile	Wed Aug 13 20:51:56 2003
@@ -7,6 +7,7 @@
 targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
 EXTRA_AFLAGS	:= -traditional
 
+CFLAGS := $(CFLAGS_NOGCOV)
 LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
 
 $(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/Makefile 901-mjb1.1/arch/i386/kernel/Makefile
--- 000-virgin/arch/i386/kernel/Makefile	Wed Aug 13 20:24:18 2003
+++ 901-mjb1.1/arch/i386/kernel/Makefile	Wed Aug 13 20:29:29 2003
@@ -17,6 +17,7 @@ obj-$(CONFIG_MCA)		+= mca.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_X86_REMOTE_DEBUG)	+= gdbstub.o
 obj-$(CONFIG_PM)		+= suspend.o
 obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= suspend_asm.o
@@ -31,6 +32,14 @@ obj-$(CONFIG_EDD)             	+= edd.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-y				+= sysenter.o vsyscall.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
+
+ifdef CONFIG_X86_REMOTE_DEBUG
+GDBSTART=gdbstart
+GDBCLEAN= -rm -f gdbstart /sbin/gdbstart
+else
+GDBSTART=
+GDBCLEAN=
+endif
 
 EXTRA_AFLAGS   := -traditional
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/apic.c 901-mjb1.1/arch/i386/kernel/apic.c
--- 000-virgin/arch/i386/kernel/apic.c	Wed Jul  2 21:59:03 2003
+++ 901-mjb1.1/arch/i386/kernel/apic.c	Wed Aug 13 20:51:40 2003
@@ -985,7 +985,7 @@ int setup_profiling_timer(unsigned int m
  * multiplier is 1 and it can be changed by writing the new multiplier
  * value into /proc/profile.
  */
-
+extern void calc_load_cpu(int cpu);
 inline void smp_local_timer_interrupt(struct pt_regs * regs)
 {
 	int cpu = smp_processor_id();
@@ -1013,6 +1013,7 @@ inline void smp_local_timer_interrupt(st
 
 #ifdef CONFIG_SMP
 		update_process_times(user_mode(regs));
+		calc_load_cpu(cpu);
 #endif
 	}
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/cpu/common.c 901-mjb1.1/arch/i386/kernel/cpu/common.c
--- 000-virgin/arch/i386/kernel/cpu/common.c	Wed Aug 13 20:24:18 2003
+++ 901-mjb1.1/arch/i386/kernel/cpu/common.c	Wed Aug 13 20:48:49 2003
@@ -454,9 +454,9 @@ void __init early_cpu_init(void)
 }
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
+ * initialized (naturally) in the bootstrap process, such as the GDT.
+ * We reload them nevertheless, this function acts as a 'CPU state barrier',
+ * nothing should get across.
  */
 void __init cpu_init (void)
 {
@@ -480,8 +480,8 @@ void __init cpu_init (void)
 	}
 
 	/*
-	 * Initialize the per-CPU GDT with the boot GDT,
-	 * and set up the GDT descriptor:
+	 * Initialize the per-CPU GDTs with the boot equivalents,
+	 * and set up the descriptors:
 	 */
 	if (cpu) {
 		memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
@@ -494,7 +494,6 @@ void __init cpu_init (void)
 	memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
 
 	__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
-	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
 
 	/*
 	 * Delete NT
@@ -538,3 +537,31 @@ void __init cpu_init (void)
 	current->used_math = 0;
 	stts();
 }
+
+/*
+ * copy over the boot node idt across all nodes, we currently only have
+ * non-unique idt entries for device io interrupts.
+ */
+void __init setup_node_idts(void)
+{
+	int node = MAX_NUMNODES;
+
+	/* we can skip setting up node0 since it's done in head.S */
+	while (--node) {
+		memcpy(node_idt_table[node], node_idt_table[0], IDT_SIZE);
+		node_idt_descr[node].size = IDT_SIZE - 1;
+		node_idt_descr[node].address = (unsigned long)node_idt_table[node];
+	}
+}
+
+void __init setup_cpu_idt(void)
+{
+	int cpu = smp_processor_id(), node =  cpu_to_node(cpu);
+
+	printk(KERN_DEBUG "CPU%d IDT at 0x%08lx\n", 
+		cpu, node_idt_descr[node].address);
+
+	/* reload the idt on all processors as they come up */
+	__asm__ __volatile__("lidt %0": "=m" (node_idt_descr[node]));
+}
+
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/doublefault.c 901-mjb1.1/arch/i386/kernel/doublefault.c
--- 000-virgin/arch/i386/kernel/doublefault.c	Tue Feb 25 23:03:43 2003
+++ 901-mjb1.1/arch/i386/kernel/doublefault.c	Wed Aug 13 20:48:49 2003
@@ -16,7 +16,7 @@ static unsigned long doublefault_stack[D
 
 static void doublefault_fn(void)
 {
-	struct Xgt_desc_struct gdt_desc = {0, 0};
+	struct Xdt_desc_struct gdt_desc = {0, 0};
 	unsigned long gdt, tss;
 
 	__asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/entry.S 901-mjb1.1/arch/i386/kernel/entry.S
--- 000-virgin/arch/i386/kernel/entry.S	Tue Aug  5 20:01:41 2003
+++ 901-mjb1.1/arch/i386/kernel/entry.S	Wed Aug 13 20:51:50 2003
@@ -49,6 +49,10 @@
 #include <asm/page.h>
 #include "irq_vectors.h"
 
+#ifndef CONFIG_KGDB_THREAD
+#define user_schedule schedule
+#endif
+
 EBX		= 0x00
 ECX		= 0x04
 EDX		= 0x08
@@ -224,7 +228,7 @@ need_resched:
 	jz restore_all
 	movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp)
 	sti
-	call schedule
+	call user_schedule
 	movl $0,TI_PRE_COUNT(%ebp)
 	cli
 	jmp need_resched
@@ -306,7 +310,7 @@ work_pending:
 	testb $_TIF_NEED_RESCHED, %cl
 	jz work_notifysig
 work_resched:
-	call schedule
+	call user_schedule
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
@@ -572,6 +576,31 @@ ENTRY(invalid_TSS)
 	pushl $do_invalid_TSS
 	jmp error_code
 
+#ifdef CONFIG_KGDB_THREAD
+ENTRY(kern_schedule)
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ss		
+	pushl	%ebp
+	pushfl
+	pushl	%cs
+	pushl	4(%ebp)
+	pushl	%eax		
+	pushl	%es
+	pushl	%ds
+	pushl	%eax
+	pushl	(%ebp)
+	pushl	%edi
+	pushl	%esi
+	pushl	%edx
+	pushl	%ecx
+	pushl	%ebx
+	call	kern_do_schedule
+	movl	%ebp, %esp
+	pop	%ebp
+	ret
+#endif
+
 ENTRY(segment_not_present)
 	pushl $do_segment_not_present
 	jmp error_code
@@ -829,7 +858,7 @@ ENTRY(sys_call_table)
 	.long sys_getdents64	/* 220 */
 	.long sys_fcntl64
 	.long sys_ni_syscall	/* reserved for TUX */
-	.long sys_ni_syscall
+ 	.long sys_mbind
 	.long sys_gettid
 	.long sys_readahead	/* 225 */
 	.long sys_setxattr
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/gdbstart.c 901-mjb1.1/arch/i386/kernel/gdbstart.c
--- 000-virgin/arch/i386/kernel/gdbstart.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/arch/i386/kernel/gdbstart.c	Wed Aug 13 20:29:29 2003
@@ -0,0 +1,147 @@
+/*
+ * This program opens a tty file and issues the GDB stub activating
+ * ioctl on it.
+ */
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/ioctls.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+
+char		*tty_name = "/dev/ttyS0" ;	/* COM1 port */
+int		 speed = 9600 ;			/* default speed */
+struct termios	 save_ts ;			/* original term struct */
+
+void print_usage(void)
+{
+    printf("gdbstub [-s speed] [-t tty-dev]\n") ;
+    printf("  defaults:  /dev/ttyS0 with speed unmodified by this program\n");
+
+} /* print_usage */
+
+void tty_err(char *msg)
+{
+    char	buf[100] ;
+
+    strcpy(buf, msg) ;
+    strcat(buf, ": ") ;
+    strcat(buf, tty_name) ;
+    perror(buf) ;
+    exit(1) ;
+
+} /* tty_err */
+
+
+void setup_term(int fd)
+{
+    struct termios	ts ;
+    int			speed_code ;
+
+    if (tcgetattr(fd, &ts) < 0) tty_err("tcgetattr") ;
+
+    save_ts = ts ;
+    switch (speed)
+    {
+    case 4800:
+	speed_code = B4800 ;
+	break ;
+    case 9600:
+	speed_code = B9600 ;
+	break ;
+    case 19200:
+	speed_code = B19200 ;
+	break ;
+    case 38400:
+	speed_code = B38400 ;
+	break ;
+    case 57600:
+	speed_code = B57600 ;
+	break ;
+    case 115200:
+	speed_code = B115200 ;
+	break ;
+    case 230400:
+	speed_code = B230400 ;
+	break ;
+    default:
+	printf("Invalid speed: %d\n", speed) ;
+	exit(1) ;
+    }
+
+    ts.c_cflag = CS8 | CREAD | CLOCAL ;
+    if (cfsetospeed(&ts, speed_code) < 0) tty_err("cfsetospeed") ;
+    if (cfsetispeed(&ts, speed_code) < 0) tty_err("cfsetispeed") ;
+
+    if (tcsetattr(fd, TCSANOW, &ts) < 0) tty_err("tcsetattr") ;
+
+} /* setup_term */
+
+int main(int argc, char **argv)
+{
+    int		opt ;
+    int		fil ;
+    int		rslt ;
+
+    while ((opt = getopt(argc, argv, "hs:t:")) > 0)
+    {
+	switch (opt)
+	{
+	case 's':
+	    speed = atol(optarg) ;
+	    break ;
+	case 't':
+	    tty_name = optarg ;
+	    break ;
+	case ':':
+	    printf("Invalid option\n") ;
+	    break ;
+	case '?':
+	case 'h':
+	default:
+	    print_usage() ;
+	    return 1;
+	}
+    }
+
+    fil = open(tty_name, O_RDWR) ;
+    if (fil < 0)
+    {
+	perror(tty_name) ;
+	return 1;
+    }
+
+
+    setup_term(fil) ;
+
+    /*
+     * When we issue this ioctl, control will not return until
+     * the debugger running on the remote host machine says "go".
+     */
+    printf("\nAbout to activate GDB stub in the kernel on %s\n", tty_name) ;
+    printf("Hit CR to continue, kill program to abort -- ") ;
+    getchar() ;
+    sync() ;
+    rslt = ioctl(fil, TIOCGDB, 0) ;
+    if (rslt < 0)
+    {
+	perror("TIOCGDB ioctl") ;
+	return 1;
+    }
+
+    printf("\nGDB stub successfully activated\n") ;
+
+    for (;;)
+    {
+	pause() ;
+    }
+
+    if (tcsetattr(fil, TCSANOW, &save_ts) < 0) tty_err("tcsetattr") ;
+
+    exit(0);
+} /* main */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/gdbstub.c 901-mjb1.1/arch/i386/kernel/gdbstub.c
--- 000-virgin/arch/i386/kernel/gdbstub.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/arch/i386/kernel/gdbstub.c	Wed Aug 13 20:29:29 2003
@@ -0,0 +1,1208 @@
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+/*
+ * Copyright (C) 2000-2001 VERITAS Software Corporation.
+ */
+/****************************************************************************
+ *  Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $
+ *
+ *  Module name: remcom.c $
+ *  Revision: 1.34 $
+ *  Date: 91/03/09 12:29:49 $
+ *  Contributor:     Lake Stevens Instrument Division$
+ *
+ *  Description:     low level support for gdb debugger. $
+ *
+ *  Considerations:  only works on target hardware $
+ *
+ *  Written by:      Glenn Engel $
+ *  Updated by:	     Amit Kale<akale@veritas.com>
+ *  ModuleState:     Experimental $
+ *
+ *  NOTES:           See Below $
+ *
+ *  Modified for 386 by Jim Kingdon, Cygnus Support.
+ *  Origianl kgdb, compatibility with 2.1.xx kernel by David Grothe <dave@gcom.com>
+ *  Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com>
+ *      thread support,
+ *      support for multiple processors,
+ *  	support for ia-32(x86) hardware debugging,
+ *  	Console support,
+ *  	handling nmi watchdog
+ *  	Amit S. Kale ( akale@veritas.com )
+ *
+ *
+ *  To enable debugger support, two things need to happen.  One, a
+ *  call to set_debug_traps() is necessary in order to allow any breakpoints
+ *  or error conditions to be properly intercepted and reported to gdb.
+ *  Two, a breakpoint needs to be generated to begin communication.  This
+ *  is most easily accomplished by a call to breakpoint().  Breakpoint()
+ *  simulates a breakpoint by executing an int 3.
+ *
+ *************
+ *
+ *    The following gdb commands are supported:
+ *
+ * command          function                               Return value
+ *
+ *    g             return the value of the CPU registers  hex data or ENN
+ *    G             set the value of the CPU registers     OK or ENN
+ *
+ *    mAA..AA,LLLL  Read LLLL bytes at address AA..AA      hex data or ENN
+ *    MAA..AA,LLLL: Write LLLL bytes at address AA.AA      OK or ENN
+ *
+ *    c             Resume at current address              SNN   ( signal NN)
+ *    cAA..AA       Continue at address AA..AA             SNN
+ *
+ *    s             Step one instruction                   SNN
+ *    sAA..AA       Step one instruction from AA..AA       SNN
+ *
+ *    k             kill
+ *
+ *    ?             What was the last sigval ?             SNN   (signal NN)
+ *
+ * All commands and responses are sent with a packet which includes a
+ * checksum.  A packet consists of
+ *
+ * $<packet info>#<checksum>.
+ *
+ * where
+ * <packet info> :: <characters representing the command or response>
+ * <checksum>    :: < two hex digits computed as modulo 256 sum of <packetinfo>>
+ *
+ * When a packet is received, it is first acknowledged with either '+' or '-'.
+ * '+' indicates a successful transfer.  '-' indicates a failed transfer.
+ *
+ * Example:
+ *
+ * Host:                  Reply:
+ * $m0,10#2a               +$00010203040506070809101112131415#42
+ *
+ ****************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <asm/vm86.h>
+#include <asm/system.h>
+#include <asm/ptrace.h>		/* for linux pt_regs struct */
+#include <linux/gdb.h>
+#ifdef CONFIG_GDB_CONSOLE
+#include <linux/console.h>
+#endif
+#include <linux/init.h>
+
+/************************************************************************
+ *
+ * external low-level support routines
+ */
+typedef void (*Function) (void);	/* pointer to a function */
+
+/* Thread reference */
+typedef unsigned char threadref[8];
+
+extern int putDebugChar(int);	/* write a single character      */
+extern int getDebugChar(void);	/* read and return a single char */
+
+extern int pid_max;
+
+/************************************************************************/
+/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/
+/* at least NUMREGBYTES*2 are needed for register packets */
+/* Longer buffer is needed to list all threads */
+#define BUFMAX 1024
+
+static char initialized;	/* boolean flag. != 0 means we've been initialized */
+
+static const char hexchars[] = "0123456789abcdef";
+
+/* Number of bytes of registers.  */
+#define NUMREGBYTES 64
+/*
+ * Note that this register image is in a different order than
+ * the register image that Linux produces at interrupt time.
+ *
+ * Linux's register image is defined by struct pt_regs in ptrace.h.
+ * Just why GDB uses a different order is a historical mystery.
+ */
+enum regnames { _EAX,		/* 0 */
+	_ECX,			/* 1 */
+	_EDX,			/* 2 */
+	_EBX,			/* 3 */
+	_ESP,			/* 4 */
+	_EBP,			/* 5 */
+	_ESI,			/* 6 */
+	_EDI,			/* 7 */
+	_PC /* 8 also known as eip */ ,
+	_PS /* 9 also known as eflags */ ,
+	_CS,			/* 10 */
+	_SS,			/* 11 */
+	_DS,			/* 12 */
+	_ES,			/* 13 */
+	_FS,			/* 14 */
+	_GS
+};				/* 15 */
+
+/***************************  ASSEMBLY CODE MACROS *************************/
+/* 									   */
+
+#define BREAKPOINT() asm("   int $3");
+
+/* Put the error code here just in case the user cares.  */
+int gdb_i386errcode;
+/* Likewise, the vector number here (since GDB only gets the signal
+   number through the usual means, and that's not very specific).  */
+int gdb_i386vector = -1;
+
+static spinlock_t slavecpulocks[KGDB_MAX_NO_CPUS];
+volatile int procindebug[KGDB_MAX_NO_CPUS];
+
+#ifdef CONFIG_SMP
+spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED;
+spinlock_t kgdb_nmispinlock = SPIN_LOCK_UNLOCKED;
+#else
+unsigned kgdb_spinlock = 0;
+unsigned kgdb_nmispinlock = 0;
+#endif
+
+static void
+kgdb_usercode(void)
+{
+}
+
+int
+hex(char ch)
+{
+	if ((ch >= 'a') && (ch <= 'f'))
+		return (ch - 'a' + 10);
+	if ((ch >= '0') && (ch <= '9'))
+		return (ch - '0');
+	if ((ch >= 'A') && (ch <= 'F'))
+		return (ch - 'A' + 10);
+	return (-1);
+}
+
+/* scan for the sequence $<data>#<checksum>     */
+void
+getpacket(char *buffer)
+{
+	unsigned char checksum;
+	unsigned char xmitcsum;
+	int i;
+	int count;
+	char ch;
+
+	do {
+		/* wait around for the start character, ignore all other characters */
+		while ((ch = (getDebugChar() & 0x7f)) != '$') ;
+		checksum = 0;
+		xmitcsum = -1;
+
+		count = 0;
+
+		/* now, read until a # or end of buffer is found */
+		while (count < BUFMAX) {
+			ch = getDebugChar() & 0x7f;
+			if (ch == '#')
+				break;
+			checksum = checksum + ch;
+			buffer[count] = ch;
+			count = count + 1;
+		}
+		buffer[count] = 0;
+
+		if (ch == '#') {
+			xmitcsum = hex(getDebugChar() & 0x7f) << 4;
+			xmitcsum += hex(getDebugChar() & 0x7f);
+
+			if (checksum != xmitcsum)
+				putDebugChar('-');	/* failed checksum */
+			else {
+				putDebugChar('+');	/* successful transfer */
+				/* if a sequence char is present, reply the sequence ID */
+				if (buffer[2] == ':') {
+					putDebugChar(buffer[0]);
+					putDebugChar(buffer[1]);
+					/* remove sequence chars from buffer */
+					count = strlen(buffer);
+					for (i = 3; i <= count; i++)
+						buffer[i - 3] = buffer[i];
+				}
+			}
+		}
+	} while (checksum != xmitcsum);
+
+}
+
+/* send the packet in buffer.  */
+
+void
+putpacket(char *buffer)
+{
+	unsigned char checksum;
+	int count;
+	char ch;
+
+	/*  $<packet info>#<checksum>. */
+	do {
+		putDebugChar('$');
+		checksum = 0;
+		count = 0;
+
+		while ((ch = buffer[count])) {
+			if (!putDebugChar(ch))
+				return;
+			checksum += ch;
+			count += 1;
+		}
+
+		putDebugChar('#');
+		putDebugChar(hexchars[checksum >> 4]);
+		putDebugChar(hexchars[checksum % 16]);
+
+	} while ((getDebugChar() & 0x7f) != '+');
+
+}
+
+static char remcomInBuffer[BUFMAX];
+static char remcomOutBuffer[BUFMAX];
+static short error;
+
+static void
+regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs)
+{
+	gdb_regs[_EAX] = regs->eax;
+	gdb_regs[_EBX] = regs->ebx;
+	gdb_regs[_ECX] = regs->ecx;
+	gdb_regs[_EDX] = regs->edx;
+	gdb_regs[_ESI] = regs->esi;
+	gdb_regs[_EDI] = regs->edi;
+	gdb_regs[_EBP] = regs->ebp;
+	gdb_regs[_DS] = regs->xds;
+	gdb_regs[_ES] = regs->xes;
+	gdb_regs[_PS] = regs->eflags;
+	gdb_regs[_CS] = regs->xcs;
+	gdb_regs[_PC] = regs->eip;
+	gdb_regs[_ESP] = (int) (&regs->esp);
+	gdb_regs[_SS] = __KERNEL_DS;
+	gdb_regs[_FS] = 0xFFFF;
+	gdb_regs[_GS] = 0xFFFF;
+}				/* regs_to_gdb_regs */
+
+static void
+gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs)
+{
+	regs->eax = gdb_regs[_EAX];
+	regs->ebx = gdb_regs[_EBX];
+	regs->ecx = gdb_regs[_ECX];
+	regs->edx = gdb_regs[_EDX];
+	regs->esi = gdb_regs[_ESI];
+	regs->edi = gdb_regs[_EDI];
+	regs->ebp = gdb_regs[_EBP];
+	regs->xds = gdb_regs[_DS];
+	regs->xes = gdb_regs[_ES];
+	regs->eflags = gdb_regs[_PS];
+	regs->xcs = gdb_regs[_CS];
+	regs->eip = gdb_regs[_PC];
+#if 0				/* can't change these */
+	regs->esp = gdb_regs[_ESP];
+	regs->xss = gdb_regs[_SS];
+	regs->fs = gdb_regs[_FS];
+	regs->gs = gdb_regs[_GS];
+#endif
+
+}				/* gdb_regs_to_regs */
+
+/* Indicate to caller of mem2hex or hex2mem that there has been an
+   error.  */
+static volatile int kgdb_memerr = 0;
+volatile int kgdb_memerr_expected = 0;
+static volatile int kgdb_memerr_cnt = 0;
+static int garbage_loc = -1;
+
+int
+get_char(char *addr)
+{
+	return *addr;
+}
+
+void
+set_char(char *addr, int val)
+{
+	*addr = val;
+}
+
+/* convert the memory pointed to by mem into hex, placing result in buf */
+/* return a pointer to the last char put in buf (null) */
+/* If MAY_FAULT is non-zero, then we should set kgdb_memerr in response to
+   a fault; if zero treat a fault like any other fault in the stub.  */
+char *
+mem2hex(char *mem, char *buf, int count, int may_fault)
+{
+	int i;
+	unsigned char ch;
+
+	if (may_fault) {
+		kgdb_memerr_expected = 1;
+		kgdb_memerr = 0;
+	}
+	for (i = 0; i < count; i++) {
+
+		ch = get_char(mem++);
+
+		if (may_fault && kgdb_memerr) {
+			*buf = 0;	/* truncate buffer */
+			return (buf);
+		}
+		*buf++ = hexchars[ch >> 4];
+		*buf++ = hexchars[ch % 16];
+	}
+	*buf = 0;
+	if (may_fault)
+		kgdb_memerr_expected = 0;
+	return (buf);
+}
+
+/* convert the hex array pointed to by buf into binary to be placed in mem */
+/* return a pointer to the character AFTER the last byte written */
+char *
+hex2mem(char *buf, char *mem, int count, int may_fault)
+{
+	int i;
+	unsigned char ch;
+
+	if (may_fault) {
+		kgdb_memerr_expected = 1;
+		kgdb_memerr = 0;
+	}
+	for (i = 0; i < count; i++) {
+		ch = hex(*buf++) << 4;
+		ch = ch + hex(*buf++);
+		set_char(mem++, ch);
+
+		if (may_fault && kgdb_memerr) {
+			return (mem);
+		}
+	}
+	if (may_fault)
+		kgdb_memerr_expected = 0;
+	return (mem);
+}
+
+/**********************************************/
+/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */
+/* RETURN NUMBER OF CHARS PROCESSED           */
+/**********************************************/
+int
+hexToInt(char **ptr, int *intValue)
+{
+	int numChars = 0;
+	int hexValue;
+
+	*intValue = 0;
+
+	while (**ptr) {
+		hexValue = hex(**ptr);
+		if (hexValue >= 0) {
+			*intValue = (*intValue << 4) | hexValue;
+			numChars++;
+		} else
+			break;
+
+		(*ptr)++;
+	}
+
+	return (numChars);
+}
+
+#ifdef CONFIG_KGDB_THREAD
+static int
+stubhex(int ch)
+{
+	if (ch >= 'a' && ch <= 'f')
+		return ch - 'a' + 10;
+	if (ch >= '0' && ch <= '9')
+		return ch - '0';
+	if (ch >= 'A' && ch <= 'F')
+		return ch - 'A' + 10;
+	return -1;
+}
+
+static int
+stub_unpack_int(char *buff, int fieldlength)
+{
+	int nibble;
+	int retval = 0;
+
+	while (fieldlength) {
+		nibble = stubhex(*buff++);
+		retval |= nibble;
+		fieldlength--;
+		if (fieldlength)
+			retval = retval << 4;
+	}
+	return retval;
+}
+#endif
+
+static char *
+pack_hex_byte(char *pkt, int byte)
+{
+	*pkt++ = hexchars[(byte >> 4) & 0xf];
+	*pkt++ = hexchars[(byte & 0xf)];
+	return pkt;
+}
+
+#define BUF_THREAD_ID_SIZE 16
+
+#ifdef CONFIG_KGDB_THREAD
+static char *
+pack_threadid(char *pkt, threadref * id)
+{
+	char *limit;
+	unsigned char *altid;
+
+	altid = (unsigned char *) id;
+	limit = pkt + BUF_THREAD_ID_SIZE;
+	while (pkt < limit)
+		pkt = pack_hex_byte(pkt, *altid++);
+	return pkt;
+}
+
+static char *
+unpack_byte(char *buf, int *value)
+{
+	*value = stub_unpack_int(buf, 2);
+	return buf + 2;
+}
+
+static char *
+unpack_threadid(char *inbuf, threadref * id)
+{
+	char *altref;
+	char *limit = inbuf + BUF_THREAD_ID_SIZE;
+	int x, y;
+
+	altref = (char *) id;
+
+	while (inbuf < limit) {
+		x = stubhex(*inbuf++);
+		y = stubhex(*inbuf++);
+		*altref++ = (x << 4) | y;
+	}
+	return inbuf;
+}
+#endif
+
+void
+int_to_threadref(threadref * id, int value)
+{
+	unsigned char *scan;
+
+	scan = (unsigned char *) id;
+	{
+		int i = 4;
+		while (i--)
+			*scan++ = 0;
+	}
+	*scan++ = (value >> 24) & 0xff;
+	*scan++ = (value >> 16) & 0xff;
+	*scan++ = (value >> 8) & 0xff;
+	*scan++ = (value & 0xff);
+}
+
+#ifdef CONFIG_KGDB_THREAD
+static int
+threadref_to_int(threadref * ref)
+{
+	int i, value = 0;
+	unsigned char *scan;
+
+	scan = (char *) ref;
+	scan += 4;
+	i = 4;
+	while (i-- > 0)
+		value = (value << 8) | ((*scan++) & 0xff);
+	return value;
+}
+
+struct task_struct *
+getthread(int pid)
+{
+	struct task_struct *thread;
+	thread = find_task_by_pid(pid);
+	if (thread) {
+		return thread;
+	}
+#if 0
+	thread = init_tasks[0];
+	do {
+		if (thread->pid == pid) {
+			return thread;
+		}
+		thread = thread->next_task;
+	} while (thread != init_tasks[0]);
+#endif
+	return NULL;
+}
+#endif
+
+struct hw_breakpoint {
+	unsigned enabled;
+	unsigned type;
+	unsigned len;
+	unsigned addr;
+} breakinfo[4] = { {
+enabled:0}, {
+enabled:0}, {
+enabled:0}, {
+enabled:0}};
+
+void
+correct_hw_break(void)
+{
+	int breakno;
+	int correctit;
+	int breakbit;
+	unsigned dr7;
+
+	asm volatile ("movl %%db7, %0\n":"=r" (dr7)
+		      :);
+	do {
+		unsigned addr0, addr1, addr2, addr3;
+		asm volatile ("movl %%db0, %0\n"
+			      "movl %%db1, %1\n"
+			      "movl %%db2, %2\n"
+			      "movl %%db3, %3\n":"=r" (addr0), "=r"(addr1),
+			      "=r"(addr2), "=r"(addr3):);
+	} while (0);
+	correctit = 0;
+	for (breakno = 0; breakno < 3; breakno++) {
+		breakbit = 2 << (breakno << 1);
+		if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
+			correctit = 1;
+			dr7 |= breakbit;
+			dr7 &= ~(0xf0000 << (breakno << 2));
+			dr7 |= (((breakinfo[breakno].len << 2) |
+				 breakinfo[breakno].type) << 16) <<
+			    (breakno << 2);
+			switch (breakno) {
+			case 0:
+				asm volatile ("movl %0, %%dr0\n"::"r"
+					      (breakinfo[breakno].addr));
+				break;
+
+			case 1:
+				asm volatile ("movl %0, %%dr1\n"::"r"
+					      (breakinfo[breakno].addr));
+				break;
+
+			case 2:
+				asm volatile ("movl %0, %%dr2\n"::"r"
+					      (breakinfo[breakno].addr));
+				break;
+
+			case 3:
+				asm volatile ("movl %0, %%dr3\n"::"r"
+					      (breakinfo[breakno].addr));
+				break;
+			}
+		} else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
+			correctit = 1;
+			dr7 &= ~breakbit;
+			dr7 &= ~(0xf0000 << (breakno << 2));
+		}
+	}
+	if (correctit) {
+		asm volatile ("movl %0, %%db7\n"::"r" (dr7));
+	}
+}
+
+int
+remove_hw_break(unsigned breakno)
+{
+	if (!breakinfo[breakno].enabled) {
+		return -1;
+	}
+	breakinfo[breakno].enabled = 0;
+	return 0;
+}
+
+int
+set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr)
+{
+	if (breakinfo[breakno].enabled) {
+		return -1;
+	}
+	breakinfo[breakno].enabled = 1;
+	breakinfo[breakno].type = type;
+	breakinfo[breakno].len = len;
+	breakinfo[breakno].addr = addr;
+	return 0;
+}
+
+void
+gdb_wait(void *arg)
+{
+	unsigned long flags;
+	int processor;
+
+	local_irq_save(flags);
+	processor = smp_processor_id();
+	procindebug[processor] = 1;
+	current->thread.kgdbregs = arg;
+	spin_lock(slavecpulocks + processor);
+	correct_hw_break();
+	procindebug[processor] = 0;
+	local_irq_restore(flags);
+}
+
+void
+printexceptioninfo(int exceptionNo, int errorcode, char *buffer)
+{
+	unsigned dr6;
+	int i;
+	switch (exceptionNo) {
+	case 1:		/* debug exception */
+		break;
+	case 3:		/* breakpoint */
+		sprintf(buffer, "Software breakpoint");
+		return;
+	default:
+		sprintf(buffer, "Details not available");
+		return;
+	}
+	asm volatile ("movl %%db6, %0\n":"=r" (dr6)
+		      :);
+	if (dr6 & 0x4000) {
+		sprintf(buffer, "Single step");
+		return;
+	}
+	for (i = 0; i < 4; ++i) {
+		if (dr6 & (1 << i)) {
+			sprintf(buffer, "Hardware breakpoint %d", i);
+			return;
+		}
+	}
+	sprintf(buffer, "Unknown trap");
+	return;
+}
+
+/*
+ * This function does all command procesing for interfacing to gdb.
+ *
+ * NOTE:  The INT nn instruction leaves the state of the interrupt
+ *        enable flag UNCHANGED.  That means that when this routine
+ *        is entered via a breakpoint (INT 3) instruction from code
+ *        that has interrupts enabled, then interrupts will STILL BE
+ *        enabled when this routine is entered.  The first thing that
+ *        we do here is disable interrupts so as to prevent recursive
+ *        entries and bothersome serial interrupts while we are
+ *        trying to run the serial port in polled mode.
+ *
+ * For kernel version 2.1.xx the cli() actually gets a spin lock so
+ * it is always necessary to do a restore_flags before returning
+ * so as to let go of that lock.
+ */
+int
+handle_exception(int exceptionVector,
+		 int signo, int err_code, struct pt_regs *linux_regs)
+{
+	struct task_struct *usethread = NULL;
+	int addr, length;
+	int breakno, breaktype;
+	char *ptr;
+	int newPC;
+	unsigned long flags = ~0UL;
+	int gdb_regs[NUMREGBYTES / 4];
+	int i;
+	int dr6;
+	int reboot = 0;
+#ifdef CONFIG_KGDB_THREAD
+	int nothreads;
+	int maxthreads;
+	int threadid;
+	threadref thref;
+	struct task_struct *thread = NULL;
+#endif
+#define	regs	(*linux_regs)
+
+	/*
+	 * If the entry is not from the kernel then return to the Linux
+	 * trap handler and let it process the interrupt normally.
+	 */
+	if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) {
+		return (0);
+	}
+
+	if (kgdb_memerr_expected) {
+		/*
+		 * This fault occured because of the get_char or set_char
+		 * routines.  These two routines use either eax of edx to
+		 * indirectly reference the location in memory that they
+		 * are working with.  For a page fault, when we return
+		 * the instruction will be retried, so we have to make
+		 * sure that these registers point to valid memory.
+		 */
+		kgdb_memerr = 1;	/* set mem error flag */
+		kgdb_memerr_expected = 0;
+		kgdb_memerr_cnt++;	/* helps in debugging */
+		regs.eax = (long) &garbage_loc;	/* make valid address */
+		regs.edx = (long) &garbage_loc;	/* make valid address */
+		return (0);
+	}
+#ifdef CONFIG_SMP
+	if (!spin_is_locked(&kgdb_nmispinlock))
+#else
+	if (!kgdb_nmispinlock)
+#endif
+	{
+
+		/* Get kgdb spinlock */
+#ifdef CONFIG_SMP
+		_raw_spin_lock(&kgdb_spinlock);
+#else
+		kgdb_spinlock = 1;
+#endif
+
+		local_irq_save(flags);
+
+		/* Disable hardware debugging while we are in kgdb */
+	      __asm__("movl %0,%%db7":	/* no output */
+	      :"r"(0));
+
+		for (i = 0; i < NR_CPUS; i++) {
+			spin_lock_init(&slavecpulocks[i]);
+			_raw_spin_lock(&slavecpulocks[i]);
+		}
+
+		if (num_online_cpus() > 1) {
+			/* Force other cpus in debugger */
+			if (smp_call_function(gdb_wait, NULL, 0, 99) != 0) {
+				return (1);
+			}
+		}
+
+		procindebug[smp_processor_id()] = 1;
+	}
+
+	gdb_i386vector = exceptionVector;
+	gdb_i386errcode = err_code;
+
+	/* reply to host that an exception has occurred */
+	remcomOutBuffer[0] = 'S';
+	remcomOutBuffer[1] = hexchars[signo >> 4];
+	remcomOutBuffer[2] = hexchars[signo % 16];
+	remcomOutBuffer[3] = 0;
+
+	putpacket(remcomOutBuffer);
+
+	while (1 == 1) {
+		error = 0;
+		remcomOutBuffer[0] = 0;
+		getpacket(remcomInBuffer);
+		switch (remcomInBuffer[0]) {
+		case '?':
+			remcomOutBuffer[0] = 'S';
+			remcomOutBuffer[1] = hexchars[signo >> 4];
+			remcomOutBuffer[2] = hexchars[signo % 16];
+			remcomOutBuffer[3] = 0;
+			break;
+		case 'g':	/* return the value of the CPU registers */
+			if (!usethread || usethread == current) {
+				regs_to_gdb_regs(gdb_regs, &regs);
+			} else {
+				memset(gdb_regs, 0, NUMREGBYTES);
+				if (usethread->thread.kgdbregs) {
+					kgdb_memerr_expected = 1;
+					kgdb_memerr = 0;
+					get_char((char *) usethread->thread.
+						 kgdbregs);
+					kgdb_memerr_expected = 0;
+					if (kgdb_memerr) {
+						gdb_regs[_PC] =
+						    (int) kgdb_usercode;
+					} else {
+						regs_to_gdb_regs(gdb_regs,
+								 usethread->
+								 thread.
+								 kgdbregs);
+					}
+				} else {
+					gdb_regs[_PC] = (int) kgdb_usercode;
+				}
+			}
+			mem2hex((char *) gdb_regs, remcomOutBuffer, NUMREGBYTES,
+				0);
+			break;
+		case 'G':	/* set the value of the CPU registers - return OK */
+			hex2mem(&remcomInBuffer[1], (char *) gdb_regs,
+				NUMREGBYTES, 0);
+			if (!usethread || usethread == current) {
+				gdb_regs_to_regs(gdb_regs, &regs);
+				strcpy(remcomOutBuffer, "OK");
+			} else {
+				strcpy(remcomOutBuffer, "E00");
+			}
+			break;
+
+			/* mAA..AA,LLLL  Read LLLL bytes at address AA..AA */
+		case 'm':
+			/* TRY TO READ %x,%x.  IF SUCCEED, SET PTR = 0 */
+			ptr = &remcomInBuffer[1];
+			if (hexToInt(&ptr, &addr))
+				if (*(ptr++) == ',')
+					if (hexToInt(&ptr, &length)) {
+						ptr = 0;
+						mem2hex((char *) addr,
+							remcomOutBuffer, length,
+							1);
+						if (kgdb_memerr) {
+							strcpy(remcomOutBuffer,
+							       "E03");
+						}
+					}
+
+			if (ptr) {
+				strcpy(remcomOutBuffer, "E01");
+			}
+			break;
+
+			/* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */
+		case 'M':
+			/* TRY TO READ '%x,%x:'.  IF SUCCEED, SET PTR = 0 */
+			ptr = &remcomInBuffer[1];
+			if (hexToInt(&ptr, &addr))
+				if (*(ptr++) == ',')
+					if (hexToInt(&ptr, &length))
+						if (*(ptr++) == ':') {
+							hex2mem(ptr,
+								(char *) addr,
+								length, 1);
+
+							if (kgdb_memerr) {
+								strcpy
+								    (remcomOutBuffer,
+								     "E03");
+							} else {
+								strcpy
+								    (remcomOutBuffer,
+								     "OK");
+							}
+
+							ptr = 0;
+						}
+			if (ptr) {
+				strcpy(remcomOutBuffer, "E02");
+			}
+			break;
+
+			/* cAA..AA    Continue at address AA..AA(optional) */
+			/* sAA..AA   Step one instruction from AA..AA(optional) */
+		case 'c':
+		case 's':
+#ifdef CONFIG_SMP
+			if (spin_is_locked(&kgdb_nmispinlock))
+#else
+			if (kgdb_nmispinlock)
+#endif
+			{
+				strcpy(remcomOutBuffer, "E01");
+				break;
+			}
+
+			/* try to read optional parameter, pc unchanged if no parm */
+			ptr = &remcomInBuffer[1];
+			if (hexToInt(&ptr, &addr)) {
+				regs.eip = addr;
+			}
+
+			newPC = regs.eip;
+
+			/* clear the trace bit */
+			regs.eflags &= 0xfffffeff;
+
+			/* set the trace bit if we're stepping */
+			if (remcomInBuffer[0] == 's')
+				regs.eflags |= 0x100;
+
+			asm volatile ("movl %%db6, %0\n":"=r" (dr6)
+				      :);
+			if (!(dr6 & 0x4000)) {
+				for (breakno = 0; breakno < 4; ++breakno) {
+					if (dr6 & (1 << breakno)) {
+						if (breakinfo[breakno].type ==
+						    0) {
+							/* Set restore flag */
+							regs.eflags |= 0x10000;
+							break;
+						}
+					}
+				}
+			}
+			correct_hw_break();
+			asm volatile ("movl %0, %%db6\n"::"r" (0));
+			for (i = 0; i < NR_CPUS; i++) {
+				_raw_spin_unlock(&slavecpulocks[i]);
+			}
+
+			procindebug[smp_processor_id()] = 0;
+			/* Release kgdb spinlock */
+#ifdef CONFIG_SMP
+			_raw_spin_unlock(&kgdb_spinlock);
+#else
+			kgdb_spinlock = 0;
+#endif
+			if (flags != ~0UL)
+				local_irq_restore(flags);
+			return (0);
+
+			/* kill the program */
+		case 'k':
+			break;
+
+			/* query */
+		case 'q':
+			switch (remcomInBuffer[1]) {
+#ifdef CONFIG_KGDB_THREAD
+			case 'L':
+				/* List threads */
+				unpack_byte(remcomInBuffer + 3, &maxthreads);
+				unpack_threadid(remcomInBuffer + 5, &thref);
+
+				remcomOutBuffer[0] = 'q';
+				remcomOutBuffer[1] = 'M';
+				remcomOutBuffer[4] = '0';
+				pack_threadid(remcomOutBuffer + 5, &thref);
+
+				threadid = threadref_to_int(&thref);
+				for (nothreads = 0;
+				     nothreads < maxthreads
+				     && threadid < pid_max; threadid++) {
+					thread = getthread(threadid);
+					if (thread) {
+						int_to_threadref(&thref,
+								 threadid);
+						pack_threadid(remcomOutBuffer +
+							      21 +
+							      nothreads * 16,
+							      &thref);
+						nothreads++;
+					}
+				}
+				if (threadid == pid_max) {
+					remcomOutBuffer[4] = '1';
+				}
+				pack_hex_byte(remcomOutBuffer + 2, nothreads);
+				remcomOutBuffer[21 + nothreads * 16] = '\0';
+				break;
+
+			case 'C':
+				/* Current thread id */
+				remcomOutBuffer[0] = 'Q';
+				remcomOutBuffer[1] = 'C';
+				threadid = current->pid;
+				int_to_threadref(&thref, threadid);
+				pack_threadid(remcomOutBuffer + 2, &thref);
+				remcomOutBuffer[18] = '\0';
+				break;
+#endif
+
+			case 'E':
+				/* Print exception info */
+				printexceptioninfo(exceptionVector, err_code,
+						   remcomOutBuffer);
+				break;
+			}
+			break;
+
+#ifdef CONFIG_KGDB_THREAD
+			/* task related */
+		case 'H':
+			switch (remcomInBuffer[1]) {
+			case 'g':
+				ptr = &remcomInBuffer[2];
+				hexToInt(&ptr, &threadid);
+				thread = getthread(threadid);
+				if (!thread) {
+					remcomOutBuffer[0] = 'E';
+					remcomOutBuffer[1] = '\0';
+					break;
+				}
+				usethread = thread;
+				/* follow through */
+			case 'c':
+				remcomOutBuffer[0] = 'O';
+				remcomOutBuffer[1] = 'K';
+				remcomOutBuffer[2] = '\0';
+				break;
+			}
+			break;
+
+			/* Query thread status */
+		case 'T':
+			ptr = &remcomInBuffer[1];
+			hexToInt(&ptr, &threadid);
+			thread = getthread(threadid);
+			if (thread) {
+				remcomOutBuffer[0] = 'O';
+				remcomOutBuffer[1] = 'K';
+				remcomOutBuffer[2] = '\0';
+			} else {
+				remcomOutBuffer[0] = 'E';
+				remcomOutBuffer[1] = '\0';
+			}
+			break;
+#endif
+
+		case 'r':
+			reboot = 1;
+			strcpy(remcomOutBuffer, "OK");
+			break;
+		case 'Y':
+			ptr = &remcomInBuffer[1];
+			hexToInt(&ptr, &breakno);
+			ptr++;
+			hexToInt(&ptr, &breaktype);
+			ptr++;
+			hexToInt(&ptr, &length);
+			ptr++;
+			hexToInt(&ptr, &addr);
+			if (set_hw_break
+			    (breakno & 0x3, breaktype & 0x3, length & 0x3, addr)
+			    == 0) {
+				strcpy(remcomOutBuffer, "OK");
+			} else {
+				strcpy(remcomOutBuffer, "ERROR");
+			}
+			break;
+
+			/* Remove hardware breakpoint */
+		case 'y':
+			ptr = &remcomInBuffer[1];
+			hexToInt(&ptr, &breakno);
+			if (remove_hw_break(breakno & 0x3) == 0) {
+				strcpy(remcomOutBuffer, "OK");
+			} else {
+				strcpy(remcomOutBuffer, "ERROR");
+			}
+			break;
+
+		}		/* switch */
+
+		/* reply to the request */
+		putpacket(remcomOutBuffer);
+		if (reboot == 1) {
+			static long no_idt[2];
+			__asm__ __volatile__("lidt %0"::"m"(no_idt));
+			__asm__ __volatile__("int3");
+		}
+	}
+}
+
+/* this function is used to set up exception handlers for tracing and
+   breakpoints */
+void
+set_debug_traps(void)
+{
+	/*
+	 * linux_debug_hook is defined in traps.c.  We store a pointer
+	 * to our own exception handler into it.
+	 */
+	linux_debug_hook = handle_exception;
+
+	/*
+	 * In case GDB is started before us, ack any packets (presumably
+	 * "$?#xx") sitting there.  */
+	putDebugChar('+');
+
+	initialized = 1;
+}
+
+/* This function will generate a breakpoint exception.  It is used at the
+   beginning of a program to sync up with a debugger and can be used
+   otherwise as a quick means to stop program execution and "break" into
+   the debugger. */
+
+void
+breakpoint(void)
+{
+	if (initialized)
+		BREAKPOINT();
+}
+
+#ifdef CONFIG_GDB_CONSOLE
+char gdbconbuf[BUFMAX];
+
+void
+gdb_console_write(struct console *co, const char *s, unsigned count)
+{
+	int i;
+	int wcount;
+	char *bufptr;
+
+	if (!gdb_initialized) {
+		return;
+	}
+	gdbconbuf[0] = 'O';
+	bufptr = gdbconbuf + 1;
+	while (count > 0) {
+		if ((count << 1) > (BUFMAX - 2)) {
+			wcount = (BUFMAX - 2) >> 1;
+		} else {
+			wcount = count;
+		}
+		count -= wcount;
+		for (i = 0; i < wcount; i++) {
+			bufptr = pack_hex_byte(bufptr, s[i]);
+		}
+		*bufptr = '\0';
+		s += wcount;
+
+		putpacket(gdbconbuf);
+
+	}
+}
+#endif
+static int __init
+kgdb_opt_gdb(char *dummy)
+{
+	gdb_enter = 1;
+	return 1;
+}
+static int __init
+kgdb_opt_gdbttyS(char *str)
+{
+	gdb_ttyS = simple_strtoul(str, NULL, 10);
+	return 1;
+}
+static int __init
+kgdb_opt_gdbbaud(char *str)
+{
+	gdb_baud = simple_strtoul(str, NULL, 10);
+	return 1;
+}
+
+/*
+ * Sequence of these lines has to be maintained because gdb option is a prefix
+ * of the other two options
+ */
+
+__setup("gdbttyS=", kgdb_opt_gdbttyS);
+__setup("gdbbaud=", kgdb_opt_gdbbaud);
+__setup("gdb", kgdb_opt_gdb);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/head.S 901-mjb1.1/arch/i386/kernel/head.S
--- 000-virgin/arch/i386/kernel/head.S	Fri May 30 19:01:58 2003
+++ 901-mjb1.1/arch/i386/kernel/head.S	Wed Aug 13 20:51:56 2003
@@ -249,7 +249,7 @@ is386:	movl $2,%ecx		# set MP
 	call check_x87
 	incb ready
 	lgdt cpu_gdt_descr
-	lidt idt_descr
+	lidt node_idt_descr		# we switch to the per-node IDTs later
 	ljmp $(__KERNEL_CS),$1f
 1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
 	movl %eax,%ss			# after changing gdt.
@@ -314,7 +314,7 @@ setup_idt:
 	movw %dx,%ax		/* selector = 0x0010 = cs */
 	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
 
-	lea idt_table,%edi
+	lea node_idt_table,%edi
 	mov $256,%ecx
 rp_sidt:
 	movl %eax,(%edi)
@@ -359,14 +359,16 @@ ignore_int:
  * segment size, and 32-bit linear address value:
  */
 
-.globl idt_descr
+.globl node_idt_descr
 .globl cpu_gdt_descr
 
 	ALIGN
 	.word 0				# 32-bit align idt_desc.address
-idt_descr:
+node_idt_descr:
 	.word IDT_ENTRIES*8-1		# idt contains 256 entries
-	.long idt_table
+	.long node_idt_table
+	
+	.fill MAX_NUMNODES-1,8,0
 
 # boot GDT descriptor (later on used by CPU#0):
 	.word 0				# 32 bit align gdt_desc.address
@@ -485,5 +487,26 @@ ENTRY(cpu_gdt_table)
 
 #ifdef CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
+#endif
+
+#ifdef CONFIG_GCOV_PROFILE
+/*
+ * The .ctors-section contains a list of pointers to constructor
+ * functions which are used to initialize gcov structures.
+ *
+ * Because there is no NULL at the end of the constructor list
+ * in the kernel we need the addresses of both the constructor
+ * as well as the destructor list which are supposed to be
+ * adjacent.
+ */
+
+.section ".ctors","aw"
+.globl  __CTOR_LIST__
+.type   __CTOR_LIST__,@object
+__CTOR_LIST__:
+.section ".dtors","aw"
+.globl  __DTOR_LIST__
+.type   __DTOR_LIST__,@object
+__DTOR_LIST__:
 #endif
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/i386_ksyms.c 901-mjb1.1/arch/i386/kernel/i386_ksyms.c
--- 000-virgin/arch/i386/kernel/i386_ksyms.c	Wed Jul  2 21:59:03 2003
+++ 901-mjb1.1/arch/i386/kernel/i386_ksyms.c	Wed Aug 13 20:29:29 2003
@@ -146,6 +146,20 @@ EXPORT_SYMBOL(smp_num_siblings);
 EXPORT_SYMBOL(cpu_sibling_map);
 #endif
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+void __this_fixmap_does_not_exist(void)
+{
+	BUG();
+}
+EXPORT_SYMBOL(__this_fixmap_does_not_exist);
+
+void __br_lock_usage_bug(void)
+{
+	BUG();
+}
+EXPORT_SYMBOL(__br_lock_usage_bug);
+#endif
+
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(cpu_online_map);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/io_apic.c 901-mjb1.1/arch/i386/kernel/io_apic.c
--- 000-virgin/arch/i386/kernel/io_apic.c	Tue Aug  5 20:01:48 2003
+++ 901-mjb1.1/arch/i386/kernel/io_apic.c	Wed Aug 13 20:48:59 2003
@@ -272,7 +272,7 @@ static void set_ioapic_affinity (unsigne
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#if defined(CONFIG_SMP)
+#if defined(CONFIG_IRQBALANCE) 
 # include <asm/processor.h>	/* kernel_thread() */
 # include <linux/kernel_stat.h>	/* kstat */
 # include <linux/slab.h>		/* kmalloc() */
@@ -667,8 +667,6 @@ static int __init irqbalance_disable(cha
 
 __setup("noirqbalance", irqbalance_disable);
 
-static void set_ioapic_affinity (unsigned int irq, unsigned long mask);
-
 static inline void move_irq(int irq)
 {
 	/* note - we hold the desc->lock */
@@ -680,9 +678,11 @@ static inline void move_irq(int irq)
 
 __initcall(balanced_irq_init);
 
-#else /* !SMP */
+#else /* !CONFIG_IRQBALANCE */
 static inline void move_irq(int irq) { }
+#endif /* CONFIG_IRQBALANCE */
 
+#ifndef CONFIG_SMP
 void send_IPI_self(int vector)
 {
 	unsigned int cfg;
@@ -697,7 +697,7 @@ void send_IPI_self(int vector)
 	 */
 	apic_write_around(APIC_ICR, cfg);
 }
-#endif /* defined(CONFIG_SMP) */
+#endif /* !CONFIG_SMP */
 
 
 /*
@@ -1136,24 +1136,59 @@ static inline int IO_APIC_irq_trigger(in
 }
 
 int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+int __initdata vector_allocated[MAX_NUMNODES][FIRST_SYSTEM_VECTOR];
 
-static int __init assign_irq_vector(int irq)
-{
-	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-	if (IO_APIC_VECTOR(irq) > 0)
-		return IO_APIC_VECTOR(irq);
+/*
+ * This is the per node vector allocator, it will only work for systems which 
+ * have ioapics which can only deliver vectors to cpus on the same node and 
+ * thus have hardware enforced ioapic/irq node affinity. 
+ *
+ * However currently the only i386 systems which have this interrupt 
+ * dispatching/servicing architecture are NUMAQ and x440. We try and 'share' 
+ * vectors where possible to simplify cases where an irq can be serviced on 
+ * multiple nodes due to it being present on multiple busses/nodes. 
+ * The first pass on node0 will ensure we catch these node 'shared' irqs.
+ */
+static int __init assign_irq_vector(int irq, int node)
+{
+	static int offset[MAX_NUMNODES];
+	static int nr_assigned[MAX_NUMNODES] = {[0 ... MAX_NUMNODES-1] = 1};
+	static int current_vector[MAX_NUMNODES] = 
+				{[0 ... MAX_NUMNODES-1] = FIRST_DEVICE_VECTOR};
+
+	int vector;
+
+	Dprintk("requesting vector for node%d/irq%d\n", node, irq);
+	vector = IO_APIC_VECTOR(irq);
+	if (vector > 0) {
+		Dprintk("returning previous allocation vector0x%x\n", vector);
+		vector_allocated[node][vector]++;
+		return vector;
+	}
+
+	if (++nr_assigned[node] > NR_IRQ_VECTORS)
+		return -ENOSPC;
+	
 next:
-	current_vector += 8;
-	if (current_vector == SYSCALL_VECTOR)
+	current_vector[node] += 8;
+	if (current_vector[node] == SYSCALL_VECTOR)
 		goto next;
 
-	if (current_vector >= FIRST_SYSTEM_VECTOR) {
-		offset = (offset + 1) & 7;
-		current_vector = FIRST_DEVICE_VECTOR + offset;
+	if (current_vector[node] > FIRST_SYSTEM_VECTOR) {
+		offset[node] = (offset[node]+1) & 7;
+		current_vector[node] = FIRST_DEVICE_VECTOR + offset[node];
 	}
 
-	IO_APIC_VECTOR(irq) = current_vector;
-	return current_vector;
+	vector = current_vector[node];
+	if (vector_allocated[node][vector])
+		goto next;
+
+	vector_allocated[node][vector]++;
+	IO_APIC_VECTOR(irq) = vector;
+	Dprintk("returning new allocation node%d/irq%d -> vector0x%x\n",
+		node, irq, vector);
+
+	return vector;
 }
 
 static struct hw_interrupt_type ioapic_level_irq_type;
@@ -1162,7 +1197,7 @@ static struct hw_interrupt_type ioapic_e
 void __init setup_IO_APIC_irqs(void)
 {
 	struct IO_APIC_route_entry entry;
-	int apic, pin, idx, irq, first_notcon = 1, vector;
+	int apic, pin, idx, irq, first_notcon = 1, vector, bus, node;
 	unsigned long flags;
 
 	printk(KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1194,12 +1229,21 @@ void __init setup_IO_APIC_irqs(void)
 		entry.trigger = irq_trigger(idx);
 		entry.polarity = irq_polarity(idx);
 
+		bus = mp_irqs[idx].mpc_srcbus;
+		node = mp_bus_id_to_node[bus];
+
 		if (irq_trigger(idx)) {
 			entry.trigger = 1;
 			entry.mask = 1;
 		}
 
 		irq = pin_2_irq(idx, apic, pin);
+		if (irq >= NR_IRQS) {
+			printk("skipping irq%d on node%d/bus%d/ioapic%d out of IRQs!\n",
+				irq, node, bus, apic);
+			continue;
+		}
+		
 		/*
 		 * skip adding the timer int on secondary nodes, which causes
 		 * a small but painful rift in the time-space continuum
@@ -1213,7 +1257,10 @@ void __init setup_IO_APIC_irqs(void)
 			continue;
 
 		if (IO_APIC_IRQ(irq)) {
-			vector = assign_irq_vector(irq);
+			vector = assign_irq_vector(irq, node);
+			if (vector < 0)
+				continue;
+
 			entry.vector = vector;
 
 			if (IO_APIC_irq_trigger(irq))
@@ -1221,11 +1268,15 @@ void __init setup_IO_APIC_irqs(void)
 			else
 				irq_desc[irq].handler = &ioapic_edge_irq_type;
 
-			set_intr_gate(vector, interrupt[irq]);
-		
+			Dprintk("irq_setup: node%d/bus%d/ioapic%d/vector0x%x - irq%d %p\n",
+				node, bus, apic, vector, irq, interrupt[irq]);
+
+			node_set_intr_gate(node, vector, interrupt[irq]);
+	
 			if (!apic && (irq < 16))
 				disable_8259A_irq(irq);
 		}
+		
 		spin_lock_irqsave(&ioapic_lock, flags);
 		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
 		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
@@ -1938,6 +1989,7 @@ static inline void init_IO_APIC_traps(vo
 			 * so default to an old-fashioned 8259
 			 * interrupt if we can..
 			 */
+			printk(KERN_DEBUG "irq%d not serviced by IOAPIC\n", irq);
 			if (irq < 16)
 				make_8259A_irq(irq);
 			else
@@ -2075,9 +2127,10 @@ static inline void check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	vector = assign_irq_vector(0);
+	vector = assign_irq_vector(0, cpu_to_node(smp_processor_id()));
+	/* This gets reserved on all nodes as FIRST_DEVICE_VECTOR */
 	set_intr_gate(vector, interrupt[0]);
-
+	
 	/*
 	 * Subtle, code in do_timer_interrupt() expects an AEOI
 	 * mode for the 8259A whenever interrupts are routed
@@ -2332,10 +2385,13 @@ int io_apic_set_pci_routing (int ioapic,
 {
 	struct IO_APIC_route_entry entry;
 	unsigned long flags;
+	int node, bus, vector;
+
+	if (irq >= NR_IRQS)
+		return -ENOSPC;
 
 	if (!IO_APIC_IRQ(irq)) {
-		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", 
-			ioapic);
+		printk(KERN_ERR "ioapic%d invalid reference to IRQ0/n", ioapic);
 		return -EINVAL;
 	}
 
@@ -2355,17 +2411,26 @@ int io_apic_set_pci_routing (int ioapic,
 	entry.polarity = 1;					/* Low active */
 
 	add_pin_to_irq(irq, ioapic, pin);
+	
+	/* XXX verify this with an x440 and plain ACPI/SMP -zwane */
+	bus = mp_irqs[pin].mpc_srcbus;
+	node = mp_bus_id_to_node[bus];
+
+	vector = assign_irq_vector(irq, node);
+	if (vector < 0)
+		return -ENOSPC;
 
-	entry.vector = assign_irq_vector(irq);
-
-	printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
-		"IRQ %d)\n", ioapic, 
+	entry.vector = vector;
+	printk(KERN_DEBUG "NODE[%d] IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
+		"IRQ %d)\n", node, ioapic, 
 		mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq);
 
 	irq_desc[irq].handler = &ioapic_level_irq_type;
 
-	set_intr_gate(entry.vector, interrupt[irq]);
-
+	printk(KERN_DEBUG "irq_route: node%d/bus%d/ioapic%d/vector0x%x - irq%d %p\n",
+		node, bus, ioapic, entry.vector, irq, interrupt[irq]);
+	node_set_intr_gate(node, entry.vector, interrupt[irq]);
+	
 	if (!ioapic && (irq < 16))
 		disable_8259A_irq(irq);
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/irq.c 901-mjb1.1/arch/i386/kernel/irq.c
--- 000-virgin/arch/i386/kernel/irq.c	Tue Aug  5 19:59:12 2003
+++ 901-mjb1.1/arch/i386/kernel/irq.c	Wed Aug 13 20:29:22 2003
@@ -961,8 +961,9 @@ static int irq_affinity_write_proc (stru
 		return -EINVAL;
 
 	irq_affinity[irq] = new_value;
+#ifndef CONFIG_X86_SUMMIT
 	irq_desc[irq].handler->set_affinity(irq, new_value);
-
+#endif
 	return full_count;
 }
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/nmi.c 901-mjb1.1/arch/i386/kernel/nmi.c
--- 000-virgin/arch/i386/kernel/nmi.c	Wed Aug 13 20:24:18 2003
+++ 901-mjb1.1/arch/i386/kernel/nmi.c	Wed Aug 13 20:29:29 2003
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/nmi.h>
 #include <linux/sysdev.h>
+#include <linux/gdb.h>
 
 #include <asm/smp.h>
 #include <asm/mtrr.h>
@@ -44,6 +45,20 @@ extern void show_registers(struct pt_reg
  */
 static int nmi_active;
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+extern gdb_debug_hook * linux_debug_hook;
+#define       CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after)           \
+{                                                                 \
+	if (linux_debug_hook != (gdb_debug_hook *) NULL && !user_mode(regs)) \
+	{                                                               \
+		(*linux_debug_hook)(trapnr, signr, error_code, regs);   \
+		after;                                                  \
+	}                                                               \
+}
+#else
+#define       CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after)
+#endif
+
 #define K7_EVNTSEL_ENABLE	(1 << 22)
 #define K7_EVNTSEL_INT		(1 << 20)
 #define K7_EVNTSEL_OS		(1 << 17)
@@ -422,12 +437,59 @@ void nmi_watchdog_tick (struct pt_regs *
 	sum = irq_stat[cpu].apic_timer_irqs;
 
 	if (last_irq_sums[cpu] == sum) {
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#ifdef CONFIG_SMP
+		if (spin_is_locked(&kgdb_spinlock))
+#else
+		if (kgdb_spinlock) 
+#endif
+		{
+			/* We are inside kgdb, this isn't a stuck cpu */
+			alert_counter[cpu] = 0;
+		} else {
+#ifdef CONFIG_SMP
+			if (spin_is_locked(&kgdb_nmispinlock))
+#else
+			if (kgdb_nmispinlock) 
+#endif
+			{
+				if (!procindebug[cpu]) {
+					procindebug[cpu] = 1;
+					current->thread.kgdbregs = regs;
+					while (1) {
+						/* nothing */
+					}
+				}
+				return;
+			}
+		}
+#endif
 		/*
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 */
 		alert_counter[cpu]++;
 		if (alert_counter[cpu] == 5*nmi_hz) {
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#ifdef CONFIG_SMP
+			if (spin_trylock(&kgdb_nmispinlock))
+#else
+			kgdb_nmispinlock = 1;
+#endif
+			{
+				procindebug[cpu] = 1;
+				CHK_REMOTE_DEBUG(2,SIGBUS,0,regs,)
+			} 
+#ifdef CONFIG_SMP
+			else {
+				procindebug[cpu] = 1;
+				current->thread.kgdbregs = regs;
+				while (1) {
+					/* nothing */
+				}
+			}
+#endif
+#endif
 			spin_lock(&nmi_print_lock);
 			/*
 			 * We are in trouble anyway, lets at least try
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/setup.c 901-mjb1.1/arch/i386/kernel/setup.c
--- 000-virgin/arch/i386/kernel/setup.c	Tue Aug  5 20:01:48 2003
+++ 901-mjb1.1/arch/i386/kernel/setup.c	Wed Aug 13 20:55:51 2003
@@ -989,9 +989,6 @@ void __init setup_arch(char **cmdline_p)
 	if (smp_found_config)
 		get_smp_config();
 #endif
-#ifdef CONFIG_X86_SUMMIT
-	setup_summit();
-#endif
 
 	register_memory(max_low_pfn);
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/smp.c 901-mjb1.1/arch/i386/kernel/smp.c
--- 000-virgin/arch/i386/kernel/smp.c	Sat Jun 14 18:37:24 2003
+++ 901-mjb1.1/arch/i386/kernel/smp.c	Wed Aug 13 20:29:29 2003
@@ -498,10 +498,17 @@ int smp_call_function (void (*func) (voi
 {
 	struct call_data_struct data;
 	int cpus = num_online_cpus()-1;
+	int count = 0;
+	int gdb;
 
-	if (!cpus)
+	if (cpus <= 0)
 		return 0;
 
+	gdb = 0;
+	if (wait == 99) {
+		wait = 0;
+		gdb = 1;
+	}
 	data.func = func;
 	data.info = info;
 	atomic_set(&data.started, 0);
@@ -517,12 +524,27 @@ int smp_call_function (void (*func) (voi
 	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 
 	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
+	while (atomic_read(&data.started) != cpus) {
+		if (gdb) {
+			if (count++ == 2000000) {
+				printk("%s: timeout\n", __FUNCTION__);
+				break;
+			}
+			if (count == 1000000) {
+				printk("looks bad\n");
+				printk("cpus=%d, started=%d\n", cpus,
+					atomic_read(&data.started));
+			}
+			if (count > 1000000)
+				udelay(1);
+		}
 		barrier();
+	}
 
 	if (wait)
 		while (atomic_read(&data.finished) != cpus)
 			barrier();
+
 	spin_unlock(&call_lock);
 
 	return 0;
@@ -564,9 +586,9 @@ asmlinkage void smp_reschedule_interrupt
 	ack_APIC_irq();
 }
 
-asmlinkage void smp_call_function_interrupt(void)
+asmlinkage void smp_call_function_interrupt(struct pt_regs regs)
 {
-	void (*func) (void *info) = call_data->func;
+	void (*func) (void *info, struct pt_regs *) = (void (*)(void *, struct pt_regs*))call_data->func;
 	void *info = call_data->info;
 	int wait = call_data->wait;
 
@@ -581,7 +603,7 @@ asmlinkage void smp_call_function_interr
 	 * At this point the info structure may be out of scope unless wait==1
 	 */
 	irq_enter();
-	(*func)(info);
+	(*func)(info, &regs);
 	irq_exit();
 
 	if (wait) {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/smpboot.c 901-mjb1.1/arch/i386/kernel/smpboot.c
--- 000-virgin/arch/i386/kernel/smpboot.c	Fri May 30 19:01:59 2003
+++ 901-mjb1.1/arch/i386/kernel/smpboot.c	Wed Aug 13 20:48:49 2003
@@ -45,6 +45,7 @@
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
+#include <asm/cpu.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
@@ -62,7 +63,7 @@ int smp_num_siblings = 1;
 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
 
 /* Bitmask of currently online CPUs */
-unsigned long cpu_online_map;
+unsigned long cpu_online_map = 1;
 
 static volatile unsigned long cpu_callin_map;
 volatile unsigned long cpu_callout_map;
@@ -442,6 +443,7 @@ int __init start_secondary(void *unused)
 	 */
 	cpu_init();
 	smp_callin();
+	setup_cpu_idt();
 	while (!test_bit(smp_processor_id(), &smp_commenced_mask))
 		rep_nop();
 	setup_secondary_APIC_clock();
@@ -949,7 +951,7 @@ static void __init smp_boot_cpus(unsigne
 
 	current_thread_info()->cpu = 0;
 	smp_tune_scheduling();
-
+	
 	/*
 	 * If we couldn't find an SMP configuration at boot time,
 	 * get out of here now!
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/summit.c 901-mjb1.1/arch/i386/kernel/summit.c
--- 000-virgin/arch/i386/kernel/summit.c	Sat Jun 14 18:37:24 2003
+++ 901-mjb1.1/arch/i386/kernel/summit.c	Wed Aug 13 20:55:51 2003
@@ -31,6 +31,7 @@
 #include <asm/io.h>
 #include <mach_mpparse.h>
 
+#ifdef CONFIG_NUMA
 static void __init setup_pci_node_map_for_wpeg(int wpeg_num, struct rio_table_hdr *rth, 
 		struct scal_detail **scal_nodes, struct rio_detail **rio_nodes){
 	int twst_num = 0, node = 0, first_bus = 0;
@@ -93,15 +94,21 @@ static void __init setup_pci_node_map_fo
 		mp_bus_id_to_node[bus] = node;
 }
 
-static void __init build_detail_arrays(struct rio_table_hdr *rth,
+static int __init build_detail_arrays(struct rio_table_hdr *rth,
 		struct scal_detail **sd, struct rio_detail **rd){
 	unsigned long ptr;
 	int i, scal_detail_size, rio_detail_size;
 
+	if ((rth->num_scal_dev > MAX_NUMNODES) || 
+	    (rth->num_rio_dev > MAX_NUMNODES * 2)){
+		printk("%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rth->num_scal_dev);
+		return 1;
+	}
+
 	switch (rth->version){
 	default:
 		printk("%s: Bad Rio Grande Table Version: %d\n", __FUNCTION__, rth->version);
-		/* Fall through to default to version 2 spec */
+		return 1;
 	case 2:
 		scal_detail_size = 11;
 		rio_detail_size = 13;
@@ -119,6 +126,8 @@ static void __init build_detail_arrays(s
 	ptr += scal_detail_size * rth->num_scal_dev;
 	for(i = 0; i < rth->num_rio_dev; i++)
 		rd[i] = (struct rio_detail *)(ptr + (rio_detail_size * i));
+
+	return 0;
 }
 
 void __init setup_summit(void)
@@ -152,11 +161,12 @@ void __init setup_summit(void)
 		return;
 	}
 
-	/* Deal with the ugly version 2/3 pointer arithmetic */
-	build_detail_arrays(rio_table_hdr, scal_devs, rio_devs);
+	if (build_detail_arrays(rio_table_hdr, scal_devs, rio_devs))
+		return;
 
 	for(i = 0; i < rio_table_hdr->num_rio_dev; i++)
 		if (is_WPEG(rio_devs[i]->type))
 			/* It's a Winnipeg, it's got PCI Busses */
 			setup_pci_node_map_for_wpeg(i, rio_table_hdr, scal_devs, rio_devs);
 }
+#endif /* CONFIG_NUMA */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/kernel/traps.c 901-mjb1.1/arch/i386/kernel/traps.c
--- 000-virgin/arch/i386/kernel/traps.c	Wed Jul  2 21:59:04 2003
+++ 901-mjb1.1/arch/i386/kernel/traps.c	Wed Aug 13 20:48:49 2003
@@ -30,6 +30,7 @@
 #include <linux/ioport.h>
 #endif
 
+#include <asm/cpu.h>
 #ifdef CONFIG_MCA
 #include <linux/mca.h>
 #endif
@@ -53,6 +54,24 @@
 
 #include "mach_traps.h"
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#include <linux/gdb.h>
+#endif
+
+#ifdef CONFIG_X86_REMOTE_DEBUG
+gdb_debug_hook * linux_debug_hook;
+#define	CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after)		\
+    {									\
+	if (linux_debug_hook != (gdb_debug_hook *) NULL && !user_mode(regs)) \
+	{								\
+		(*linux_debug_hook)(trapnr, signr, error_code, regs) ;	\
+		after;							\
+	}								\
+    }
+#else
+#define	CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after)	
+#endif
+ 
 asmlinkage int system_call(void);
 asmlinkage void lcall7(void);
 asmlinkage void lcall27(void);
@@ -68,7 +87,9 @@ char ignore_fpu_irq = 0;
  * F0 0F bug workaround.. We have a special link segment
  * for this.
  */
-struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+
+struct desc_struct node_idt_table[MAX_NUMNODES][IDT_ENTRIES] __attribute__((__section__(".data.idt"))) = 
+	{[0 ... MAX_NUMNODES-1] = { {0, 0}, }};
 
 asmlinkage void divide_error(void);
 asmlinkage void debug(void);
@@ -258,6 +279,7 @@ void die(const char * str, struct pt_reg
 	bust_spinlocks(1);
 	handle_BUG(regs);
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+	CHK_REMOTE_DEBUG(1,SIGTRAP,err,regs,);
 	show_registers(regs);
 	bust_spinlocks(0);
 	spin_unlock_irq(&die_lock);
@@ -327,6 +349,7 @@ static inline void do_trap(int trapnr, i
 #define DO_ERROR(trapnr, signr, str, name) \
 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 { \
+	CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\
 	do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
 }
 
@@ -344,7 +367,9 @@ asmlinkage void do_##name(struct pt_regs
 #define DO_VM86_ERROR(trapnr, signr, str, name) \
 asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
 { \
+	CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,return)\
 	do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+	return; \
 }
 
 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -387,8 +412,10 @@ gp_in_vm86:
 	return;
 
 gp_in_kernel:
-	if (!fixup_exception(regs))
+	if (!fixup_exception(regs)) {
+		CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,)
 		die("general protection fault", regs, error_code);
+	}
 }
 
 static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
@@ -550,8 +577,10 @@ asmlinkage void do_debug(struct pt_regs 
 		 * allowing programs to debug themselves without the ptrace()
 		 * interface.
 		 */
+#ifndef CONFIG_X86_REMOTE_DEBUG
 		if ((regs->xcs & 3) == 0)
 			goto clear_TF_reenable;
+#endif
 		if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
 			goto clear_TF;
 	}
@@ -563,11 +592,13 @@ asmlinkage void do_debug(struct pt_regs 
 	info.si_errno = 0;
 	info.si_code = TRAP_BRKPT;
 	
-	/* If this is a kernel mode trap, save the user PC on entry to 
-	 * the kernel, that's what the debugger can make sense of.
-	 */
-	info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : 
-	                                        (void *)regs->eip;
+
+	/* If this is a kernel mode trap, we need to reset db7 to allow us
+	 * to continue sanely */
+	if ((regs->xcs & 3) == 0)
+		goto clear_dr7;
+
+	info.si_addr = (void *)regs->eip;
 	force_sig_info(SIGTRAP, &info, tsk);
 
 	/* Disable additional traps. They'll be re-enabled when
@@ -577,13 +608,16 @@ clear_dr7:
 	__asm__("movl %0,%%db7"
 		: /* no output */
 		: "r" (0));
+	CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,)
 	return;
 
 debug_vm86:
 	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
 	return;
 
+#ifndef CONFIG_X86_REMOTE_DEBUG
 clear_TF_reenable:
+#endif
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 clear_TF:
 	regs->eflags &= ~TF_MASK;
@@ -773,14 +807,16 @@ asmlinkage void math_emulate(long arg)
 #ifdef CONFIG_X86_F00F_BUG
 void __init trap_init_f00f_bug(void)
 {
-	__set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+	int node = cpu_to_node(smp_processor_id());
+
+	__set_fixmap(FIX_F00F_IDT, __pa(&node_idt_table[node]), PAGE_KERNEL_RO);
 
 	/*
 	 * Update the IDT descriptor and reload the IDT so that
 	 * it uses the read-only mapped virtual address.
 	 */
-	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
-	__asm__ __volatile__("lidt %0": "=m" (idt_descr));
+	node_idt_descr[node].address = fix_to_virt(FIX_F00F_IDT);
+	__asm__ __volatile__("lidt %0": "=m" (node_idt_descr[node]));
 }
 #endif
 
@@ -799,24 +835,36 @@ do { \
 
 
 /*
- * This needs to use 'idt_table' rather than 'idt', and
+ * This needs to use 'node_idt_table' rather than 'idt', and
  * thus use the _nonmapped_ version of the IDT, as the
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
+
+void node_set_intr_gate(unsigned int node, unsigned int n, void *addr)
+{
+	_set_gate(&node_idt_table[node][n],14,0,addr,__KERNEL_CS);
+}
+
 void set_intr_gate(unsigned int n, void *addr)
 {
-	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
+	int node;
+	for (node = 0; node < MAX_NUMNODES; node++)
+ 		node_set_intr_gate(node, n, addr);
 }
 
 static void __init set_trap_gate(unsigned int n, void *addr)
 {
-	_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
+	int node;
+	for (node = 0; node < MAX_NUMNODES; node++)
+		_set_gate(&node_idt_table[node][n],15,0,addr,__KERNEL_CS);
 }
 
 static void __init set_system_gate(unsigned int n, void *addr)
 {
-	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
+	int node;
+	for (node = 0; node < MAX_NUMNODES; node++)
+		_set_gate(&node_idt_table[node][n],15,3,addr,__KERNEL_CS);
 }
 
 static void __init set_call_gate(void *a, void *addr)
@@ -826,7 +874,9 @@ static void __init set_call_gate(void *a
 
 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
 {
-	_set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
+	int node;
+	for (node = 0; node < MAX_NUMNODES; node++)
+		_set_gate(&node_idt_table[node][n],5,0,0,(gdt_entry<<3));
 }
 
 
@@ -877,6 +927,9 @@ void __init trap_init(void)
 	 */
 	set_call_gate(&default_ldt[0],lcall7);
 	set_call_gate(&default_ldt[4],lcall27);
+
+	/* setup the pernode idt tables */
+	setup_node_idts();
 
 	/*
 	 * Should be a barrier for any external CPU state.
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/lib/dec_and_lock.c 901-mjb1.1/arch/i386/lib/dec_and_lock.c
--- 000-virgin/arch/i386/lib/dec_and_lock.c	Sun Nov 17 20:29:28 2002
+++ 901-mjb1.1/arch/i386/lib/dec_and_lock.c	Wed Aug 13 20:29:36 2003
@@ -10,6 +10,7 @@
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
 
+#ifndef ATOMIC_DEC_AND_LOCK
 int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 {
 	int counter;
@@ -38,3 +39,5 @@ slow_path:
 	spin_unlock(lock);
 	return 0;
 }
+#endif
+
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/mm/fault.c 901-mjb1.1/arch/i386/mm/fault.c
--- 000-virgin/arch/i386/mm/fault.c	Fri May 30 19:01:59 2003
+++ 901-mjb1.1/arch/i386/mm/fault.c	Wed Aug 13 20:48:49 2003
@@ -2,6 +2,11 @@
  *  linux/arch/i386/mm/fault.c
  *
  *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Change History
+ *
+ *	Tigran Aivazian <tigran@sco.com>	Remote debugging support.
+ *
  */
 
 #include <linux/signal.h>
@@ -20,6 +25,9 @@
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/module.h>
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#include <linux/gdb.h>
+#endif
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -112,6 +120,15 @@ asmlinkage void do_page_fault(struct pt_
 	if (in_atomic() || !mm)
 		goto no_context;
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+	if (kgdb_memerr_expected) {
+		if (linux_debug_hook != (gdb_debug_hook *) NULL) {
+			(*linux_debug_hook)(14, SIGSEGV, error_code, regs) ;
+			return;            /* return w/modified regs */
+		}
+	}
+#endif
+
 	down_read(&mm->mmap_sem);
 
 	vma = find_vma(mm, address);
@@ -211,14 +228,27 @@ bad_area:
 		return;
 	}
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+	if (kgdb_memerr_expected) {
+		if (linux_debug_hook != (gdb_debug_hook *) NULL) {
+			(*linux_debug_hook)(14, SIGSEGV, error_code, regs);
+			return; /* Return with modified registers */
+		}
+	} else {
+		if (linux_debug_hook != (gdb_debug_hook *) NULL) {
+			(*linux_debug_hook)(14, SIGSEGV, error_code, regs);
+		}
+	}
+#endif
+
 #ifdef CONFIG_X86_F00F_BUG
 	/*
 	 * Pentium F0 0F C7 C8 bug workaround.
 	 */
 	if (boot_cpu_data.f00f_bug) {
-		unsigned long nr;
-		
-		nr = (address - idt_descr.address) >> 3;
+		unsigned long nr, node;
+		node = cpu_to_node(smp_processor_id());
+		nr = (address - node_idt_descr[node].address) >> 3;
 
 		if (nr == 6) {
 			do_invalid_op(regs, 0);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/mm/hugetlbpage.c 901-mjb1.1/arch/i386/mm/hugetlbpage.c
--- 000-virgin/arch/i386/mm/hugetlbpage.c	Tue Jun 24 21:29:16 2003
+++ 901-mjb1.1/arch/i386/mm/hugetlbpage.c	Wed Aug 13 20:51:52 2003
@@ -61,6 +61,27 @@ static struct page *alloc_fresh_huge_pag
 
 void free_huge_page(struct page *page);
 
+#ifdef CONFIG_NUMA
+
+static inline void huge_inc_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+	mm->pernode_rss[page_to_nid(page)] += (HPAGE_SIZE / PAGE_SIZE);
+}
+
+static inline void huge_dec_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss -= (HPAGE_SIZE / PAGE_SIZE);
+	mm->pernode_rss[page_to_nid(page)] -= (HPAGE_SIZE / PAGE_SIZE);
+}
+
+#else /* !CONFIG_NUMA */
+
+#define huge_inc_rss(mm, page)	((mm)->rss += (HPAGE_SIZE / PAGE_SIZE))
+#define huge_dec_rss(mm, page)	((mm)->rss -= (HPAGE_SIZE / PAGE_SIZE))
+
+#endif /* CONFIG_NUMA */
+
 static struct page *alloc_hugetlb_page(void)
 {
 	int i;
@@ -105,7 +126,7 @@ static void set_huge_pte(struct mm_struc
 {
 	pte_t entry;
 
-	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+	huge_inc_rss(mm, page);
 	if (write_access) {
 		entry =
 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -145,7 +166,7 @@ int copy_hugetlb_page_range(struct mm_st
 		ptepage = pte_page(entry);
 		get_page(ptepage);
 		set_pte(dst_pte, entry);
-		dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+		huge_inc_rss(dst, ptepage);
 		addr += HPAGE_SIZE;
 	}
 	return 0;
@@ -314,8 +335,8 @@ void unmap_hugepage_range(struct vm_area
 		page = pte_page(*pte);
 		huge_page_release(page);
 		pte_clear(pte);
+		huge_dec_rss(mm, page);
 	}
-	mm->rss -= (end - start) >> PAGE_SHIFT;
 	flush_tlb_range(vma, start, end);
 }
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/mm/init.c 901-mjb1.1/arch/i386/mm/init.c
--- 000-virgin/arch/i386/mm/init.c	Wed Aug 13 20:24:18 2003
+++ 901-mjb1.1/arch/i386/mm/init.c	Wed Aug 13 20:51:03 2003
@@ -121,6 +121,24 @@ static void __init page_table_range_init
 	}
 }
 
+
+/*
+ * Abstract out using large pages when mapping KVA, or the SMP identity
+ * mapping
+ */
+void pmd_map_pfn_range(pmd_t* pmd_entry, unsigned long pfn, unsigned long max_pfn)
+{
+	int pte_ofs;
+	/* Map with big pages if possible, otherwise create normal page tables. */
+	if (cpu_has_pse) {
+		set_pmd(pmd_entry, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+		pfn += PTRS_PER_PTE;
+	} else {
+		pte_t* pte = one_page_table_init(pmd_entry);
+		for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_pfn; pte++, pfn++, pte_ofs++)
+			set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+	}
+}
 /*
  * This maps the physical memory to kernel virtual address space, a total 
  * of max_low_pfn pages, by creating page tables starting from address 
@@ -131,8 +149,7 @@ static void __init kernel_physical_mappi
 	unsigned long pfn;
 	pgd_t *pgd;
 	pmd_t *pmd;
-	pte_t *pte;
-	int pgd_idx, pmd_idx, pte_ofs;
+	int pgd_idx, pmd_idx;
 
 	pgd_idx = pgd_index(PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
@@ -142,21 +159,48 @@ static void __init kernel_physical_mappi
 		pmd = one_md_table_init(pgd);
 		if (pfn >= max_low_pfn)
 			continue;
-		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
-			/* Map with big pages if possible, otherwise create normal page tables. */
-			if (cpu_has_pse) {
-				set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
-				pfn += PTRS_PER_PTE;
-			} else {
-				pte = one_page_table_init(pmd);
-
-				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++)
-					set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
-			}
+	
+		/* beware of starting KVA in the middle of a pmd. */
+		if( pgd_idx == pgd_index(PAGE_OFFSET) ) {
+			pmd_idx = pmd_index(PAGE_OFFSET);
+			pmd = &pmd[pmd_idx];
+		} else
+			pmd_idx = 0;
+
+		for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+			pmd_map_pfn_range(pmd, pfn, max_low_pfn);
+			pfn += PTRS_PER_PTE; 
 		}
 	}	
 }
 
+/*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup in zap_low_mappings().
+ */
+static void __init low_physical_mapping_init(pgd_t *pgd_base)
+{
+#if CONFIG_X86_PAE
+	unsigned long pfn = 0;
+	int pmd_ofs = 0;
+	pmd_t *pmd = one_md_table_init(pgd_base);
+
+	if(!cpu_has_pse) {
+		printk("PAE enabled, but no support for PSE (large pages)!\n");
+		printk("this is likely to waste some RAM.");
+	}
+	
+	for (; pmd_ofs < PTRS_PER_PMD && pfn <= max_low_pfn; pmd++, pmd_ofs++) { 
+		pmd_map_pfn_range(pmd, pfn, max_low_pfn);
+		pfn += PTRS_PER_PTE;
+	}		
+#endif
+}
+
+
 static inline int page_kills_ppro(unsigned long pagenr)
 {
 	if (pagenr >= 0x70000 && pagenr <= 0x7003F)
@@ -217,7 +261,7 @@ void __init permanent_kmaps_init(pgd_t *
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pmd = pmd_offset(pgd, vaddr);
 	pte = pte_offset_kernel(pmd, vaddr);
-	pkmap_page_table = pte;	
+	pkmap_page_table = pte;
 }
 
 void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
@@ -282,6 +326,7 @@ static void __init pagetable_init (void)
 	}
 
 	kernel_physical_mapping_init(pgd_base);
+	low_physical_mapping_init(pgd_base);
 	remap_numa_kva();
 
 	/*
@@ -290,19 +335,7 @@ static void __init pagetable_init (void)
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 	page_table_range_init(vaddr, 0, pgd_base);
-
 	permanent_kmaps_init(pgd_base);
-
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
-#endif
 }
 
 void zap_low_mappings (void)
@@ -314,7 +347,7 @@ void zap_low_mappings (void)
 	 * Note that "pgd_clear()" doesn't do it for
 	 * us, because pgd_clear() is a no-op on i386.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
+	for (i = 0; i < FIRST_KERNEL_PGD_PTR; i++)
 #ifdef CONFIG_X86_PAE
 		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
@@ -511,6 +544,7 @@ void __init mem_init(void)
 
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
+kmem_cache_t *kernel_pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
@@ -523,6 +557,15 @@ void __init pgtable_cache_init(void)
 					NULL);
 		if (!pmd_cache)
 			panic("pgtable_cache_init(): cannot create pmd cache");
+
+		kernel_pmd_cache = kmem_cache_create("pae_kernel_pmd",
+						(PTRS_PER_PMD*sizeof(pmd_t))*KERNEL_PGD_PTRS,
+						0,
+						SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
+						kernel_pmd_ctor,
+						NULL);
+		if (!kernel_pmd_cache)
+			panic("pgtable_cache_init(): cannot create kernel pmd cache");
 	}
 	pgd_cache = kmem_cache_create("pgd",
 				PTRS_PER_PGD*sizeof(pgd_t),
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/mm/pgtable.c 901-mjb1.1/arch/i386/mm/pgtable.c
--- 000-virgin/arch/i386/mm/pgtable.c	Wed Aug 13 20:24:18 2003
+++ 901-mjb1.1/arch/i386/mm/pgtable.c	Wed Aug 13 20:51:03 2003
@@ -157,6 +157,28 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
 	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
 }
 
+void kernel_pmd_ctor(void *__pmd, kmem_cache_t *kernel_pmd_cache, unsigned long flags)
+{
+	pmd_t *pmd = __pmd;
+	int i;
+
+	/* 
+	 * you only need to memset the portion which isn't used by
+	 * the kernel
+	 */
+	clear_page(__pmd);
+
+	for (i=FIRST_KERNEL_PGD_PTR; i<PTRS_PER_PGD; i++, pmd+=PTRS_PER_PMD) {
+		pmd_t *kern_pmd = (pmd_t *)pgd_page(swapper_pg_dir[i]);
+		int start_index = USER_PTRS_PER_PMD(i);
+		pmd_t *dst_pmd = &pmd[start_index];
+		pmd_t *src_pmd = &kern_pmd[start_index];
+		int num_pmds = PTRS_PER_PMD-USER_PTRS_PER_PMD(i);
+		
+		memcpy(dst_pmd, src_pmd, num_pmds*sizeof(pmd_t));
+	}
+}
+
 /*
  * List of all pgd's needed for non-PAE so it can invalidate entries
  * in both cached and uncached pgd's; not needed for PAE since the
@@ -211,17 +233,28 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	if (PTRS_PER_PMD == 1 || !pgd)
 		return pgd;
 
-	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+	for (i = 0; i < PTRS_PER_PGD; ++i) {
+		pmd_t *pmd = NULL;
+		
+		if (i == FIRST_KERNEL_PGD_PTR)
+			pmd = kmem_cache_alloc(kernel_pmd_cache, GFP_KERNEL);
+		else if (i < FIRST_KERNEL_PGD_PTR)
+			pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+		else
+			pmd += PTRS_PER_PMD;
+
 		if (!pmd)
 			goto out_oom;
+		/* bleh.  that's ugly, bad wli */
 		set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
 	}
 	return pgd;
 
 out_oom:
-	for (i--; i >= 0; i--)
-		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+	for (i--; i >= 0; i--) {
+		pmd_t *pmd = pmd_offset(&pgd[i],0);
+		kmem_cache_free(pmd_cache, pmd);
+	}
 	kmem_cache_free(pgd_cache, pgd);
 	return NULL;
 }
@@ -231,9 +264,18 @@ void pgd_free(pgd_t *pgd)
 	int i;
 
 	/* in the PAE case user pgd entries are overwritten before usage */
-	if (PTRS_PER_PMD > 1)
-		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+	if (PTRS_PER_PMD > 1) {
+		for (i = 0; i < PTRS_PER_PGD; i++) {
+			pmd_t *pmd_to_free = pmd_offset(&pgd[i],0);
+
+			set_pgd(&pgd[i], __pgd(0));
+			
+			if (i < FIRST_KERNEL_PGD_PTR)
+				kmem_cache_free(pmd_cache, pmd_to_free);
+			else if (i == FIRST_KERNEL_PGD_PTR)
+				kmem_cache_free(kernel_pmd_cache, pmd_to_free);
+		}
+	}
 	/* in the non-PAE case, clear_page_tables() clears user pgd entries */
 	kmem_cache_free(pgd_cache, pgd);
 }
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/i386/vmlinux.lds.S 901-mjb1.1/arch/i386/vmlinux.lds.S
--- 000-virgin/arch/i386/vmlinux.lds.S	Sat Jun 14 18:37:24 2003
+++ 901-mjb1.1/arch/i386/vmlinux.lds.S	Wed Aug 13 20:27:43 2003
@@ -10,7 +10,7 @@ ENTRY(startup_32)
 jiffies = jiffies_64;
 SECTIONS
 {
-  . = 0xC0000000 + 0x100000;
+  . = __PAGE_OFFSET + 0x100000;
   /* read-only */
   _text = .;			/* Text and read-only data */
   .text : {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/Kconfig 901-mjb1.1/arch/ppc/Kconfig
--- 000-virgin/arch/ppc/Kconfig	Wed Aug 13 20:24:20 2003
+++ 901-mjb1.1/arch/ppc/Kconfig	Wed Aug 13 20:51:56 2003
@@ -1378,6 +1378,36 @@ source "net/bluetooth/Kconfig"
 
 source "lib/Kconfig"
 
+menu "GCOV coverage profiling"
+
+config GCOV_PROFILE
+	bool "GCOV coverage profiling"
+	---help---
+	Provide infrastructure for coverage support for the kernel. This
+	will not compile the kernel by default with the necessary flags.
+	To obtain coverage information for the entire kernel, one should
+	enable the subsequent option (Profile entire kernel). If only
+	particular files or directories of the kernel are desired, then
+	one must provide the following compile options for such targets:
+		"-fprofile-arcs -ftest-coverage" in the CFLAGS. To obtain
+	access to the coverage data one must insmod the gcov-prof kernel
+	module.
+
+config GCOV_ALL
+	bool "GCOV_ALL"
+	depends on GCOV_PROFILE
+	---help---
+	If you say Y here, it will compile the entire kernel with coverage
+	option enabled.
+
+config GCOV_PROC
+        tristate "gcov-proc module"
+        depends on GCOV_PROFILE && PROC_FS
+        ---help---
+        This is the gcov-proc module that exposes gcov data through the
+        /proc filesystem
+
+endmenu
 
 menu "Kernel hacking"
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/boot/openfirmware/common.c 901-mjb1.1/arch/ppc/boot/openfirmware/common.c
--- 000-virgin/arch/ppc/boot/openfirmware/common.c	Sun Nov 17 20:29:52 2002
+++ 901-mjb1.1/arch/ppc/boot/openfirmware/common.c	Wed Aug 13 20:51:56 2003
@@ -30,6 +30,10 @@ struct memchunk {
 
 static struct memchunk *freechunks;
 
+#ifdef CONFIG_GCOV_PROFILE
+void __bb_init_func (void *ptr /* struct bb *blocks */) { }
+#endif
+
 static void *zalloc(void *x, unsigned items, unsigned size)
 {
     void *p;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/boot/prep/misc.c 901-mjb1.1/arch/ppc/boot/prep/misc.c
--- 000-virgin/arch/ppc/boot/prep/misc.c	Thu Jan  9 19:15:57 2003
+++ 901-mjb1.1/arch/ppc/boot/prep/misc.c	Wed Aug 13 20:51:56 2003
@@ -71,6 +71,10 @@ extern unsigned long serial_init(int cha
 extern void serial_fixups(void);
 extern unsigned long get_mem_size(void);
 
+#ifdef CONFIG_GCOV_PROFILE
+void __bb_init_func (void *ptr /* struct bb *blocks */) { }
+#endif
+
 void
 writel(unsigned int val, unsigned int address)
 {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/kernel/Makefile 901-mjb1.1/arch/ppc/kernel/Makefile
--- 000-virgin/arch/ppc/kernel/Makefile	Fri May 30 19:02:00 2003
+++ 901-mjb1.1/arch/ppc/kernel/Makefile	Wed Aug 13 20:51:56 2003
@@ -15,8 +15,8 @@ extra-$(CONFIG_40x)		:= head_4xx.o
 extra-$(CONFIG_8xx)		:= head_8xx.o
 extra-$(CONFIG_6xx)		+= idle_6xx.o
 
-obj-y				:= entry.o traps.o irq.o idle.o time.o misc.o \
-					process.o signal.o ptrace.o align.o \
+obj-y				:= entry.o ptrace.o traps.o irq.o idle.o time.o misc.o \
+					process.o signal.o align.o \
 					semaphore.o syscalls.o setup.o \
 					cputable.o ppc_htab.o
 obj-$(CONFIG_6xx)		+= l2cr.o cpu_setup_6xx.o
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/kernel/entry.S 901-mjb1.1/arch/ppc/kernel/entry.S
--- 000-virgin/arch/ppc/kernel/entry.S	Wed Jul  2 21:59:07 2003
+++ 901-mjb1.1/arch/ppc/kernel/entry.S	Wed Aug 13 20:51:56 2003
@@ -106,10 +106,26 @@ transfer_to_handler:
 	mfspr	r11,SPRN_HID0
 	mtcr	r11
 BEGIN_FTR_SECTION
+#ifdef CONFIG_GCOV_PROFILE
+	bt-	8,near1_power_save_6xx_restore	/* Check DOZE */
+	b       skip1_power_save_6xx_restore    
+near1_power_save_6xx_restore:
+	b	power_save_6xx_restore
+skip1_power_save_6xx_restore:
+#else
 	bt-	8,power_save_6xx_restore	/* Check DOZE */
+#endif
 END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
 BEGIN_FTR_SECTION
+#ifdef CONFIG_GCOV_PROFILE
+	bt-	9,near2_power_save_6xx_restore	/* Check NAP */
+	b	skip2_power_save_6xx_restore
+near2_power_save_6xx_restore:
+	b	power_save_6xx_restore
+skip2_power_save_6xx_restore:
+#else
 	bt-	9,power_save_6xx_restore	/* Check NAP */
+#endif
 END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #endif /* CONFIG_6xx */
 	.globl transfer_to_handler_cont
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/kernel/head.S 901-mjb1.1/arch/ppc/kernel/head.S
--- 000-virgin/arch/ppc/kernel/head.S	Sat Jun 14 18:37:25 2003
+++ 901-mjb1.1/arch/ppc/kernel/head.S	Wed Aug 13 20:51:56 2003
@@ -1643,3 +1643,25 @@ intercept_table:
  */
 abatron_pteptrs:
 	.space	8
+
+#ifdef CONFIG_GCOV_PROFILE
+/*
+ * The .ctors-section contains a list of pointers to constructor
+ * functions which are used to initialize gcov structures.
+ *  
+ * Because there is no NULL at the end of the constructor list
+ * in the kernel we need the addresses of both the constructor
+ * as well as the destructor list which are supposed to be
+ * adjacent.
+ */ 
+ 
+.section ".ctors","aw"
+.globl  __CTOR_LIST__
+.type   __CTOR_LIST__,@object
+__CTOR_LIST__:
+.section ".dtors","aw"
+.globl  __DTOR_LIST__
+.type   __DTOR_LIST__,@object
+__DTOR_LIST__:
+#endif
+
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc/syslib/prom_init.c 901-mjb1.1/arch/ppc/syslib/prom_init.c
--- 000-virgin/arch/ppc/syslib/prom_init.c	Tue Aug  5 20:01:48 2003
+++ 901-mjb1.1/arch/ppc/syslib/prom_init.c	Wed Aug 13 20:51:56 2003
@@ -667,7 +667,11 @@ prom_instantiate_rtas(void)
 		 * Actually OF has bugs so we just arbitrarily
 		 * use memory at the 6MB point.
 		 */
+#ifdef CONFIG_GCOV_PROFILE
+		rtas_data = 0x990000;
+#else
 		rtas_data = 6 << 20;
+#endif
 		prom_print(" at ");
 		prom_print_hex(rtas_data);
 	}
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc64/Kconfig 901-mjb1.1/arch/ppc64/Kconfig
--- 000-virgin/arch/ppc64/Kconfig	Wed Aug 13 20:24:20 2003
+++ 901-mjb1.1/arch/ppc64/Kconfig	Wed Aug 13 20:51:56 2003
@@ -342,6 +342,37 @@ config VIOPATH
 
 source "arch/ppc64/oprofile/Kconfig"
 
+menu "GCOV coverage profiling"
+
+config GCOV_PROFILE
+        bool "GCOV coverage profiling"
+        ---help---
+        Provide infrastructure for coverage support for the kernel. This
+        will not compile the kernel by default with the necessary flags.
+        To obtain coverage information for the entire kernel, one should
+        enable the subsequent option (Profile entire kernel). If only
+        particular files or directories of the kernel are desired, then
+        one must provide the following compile options for such targets:
+                "-fprofile-arcs -ftest-coverage" in the CFLAGS. To obtain
+        access to the coverage data one must insmod the gcov-prof kernel
+        module.
+
+config GCOV_ALL
+        bool "GCOV_ALL"
+        depends on GCOV_PROFILE
+        ---help---
+        If you say Y here, it will compile the entire kernel with coverage
+        option enabled.
+
+config GCOV_PROC
+        tristate "gcov-proc module"
+        depends on GCOV_PROFILE && PROC_FS
+        ---help---
+        This is the gcov-proc module that exposes gcov data through the
+        /proc filesystem
+
+endmenu
+
 menu "Kernel hacking"
 
 config DEBUG_KERNEL
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/ppc64/kernel/head.S 901-mjb1.1/arch/ppc64/kernel/head.S
--- 000-virgin/arch/ppc64/kernel/head.S	Sat Jun 14 18:37:25 2003
+++ 901-mjb1.1/arch/ppc64/kernel/head.S	Wed Aug 13 20:51:56 2003
@@ -2015,3 +2015,24 @@ stab_array:
 	.globl	cmd_line
 cmd_line:
 	.space	512
+
+#ifdef CONFIG_GCOV_PROFILE
+/*
+ * The .ctors-section contains a list of pointers to constructor
+ * functions which are used to initialize gcov structures.
+ *
+ * Because there is no NULL at the end of the constructor list
+ * in the kernel we need the addresses of both the constructor
+ * as well as the destructor list which are supposed to be
+ * adjacent.
+ */
+
+.section ".ctors","aw"
+.globl  __CTOR_LIST__
+.type   __CTOR_LIST__,@object
+__CTOR_LIST__:
+.section ".dtors","aw"
+.globl  __DTOR_LIST__
+.type   __DTOR_LIST__,@object
+__DTOR_LIST__:
+#endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/sparc64/kernel/devices.c 901-mjb1.1/arch/sparc64/kernel/devices.c
--- 000-virgin/arch/sparc64/kernel/devices.c	Sat May 10 18:34:35 2003
+++ 901-mjb1.1/arch/sparc64/kernel/devices.c	Wed Aug 13 20:29:36 2003
@@ -31,6 +31,8 @@ int linux_num_cpus = 0;
 extern void cpu_probe(void);
 extern void central_probe(void);
 
+unsigned long cpu_hz;
+
 void __init device_scan(void)
 {
 	char node_str[128];
@@ -68,6 +70,8 @@ void __init device_scan(void)
 					prom_getproperty(scan, "portid",
 							 (char *) &thismid, sizeof(thismid));
 				}
+				if (!cpu_hz)
+					cpu_hz = prom_getint(scan, "clock-frequency");
 				linux_cpus[cpu_ctr].mid = thismid;
 				printk("Found CPU %d (node=%08x,mid=%d)\n",
 				       cpu_ctr, (unsigned) scan, thismid);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/sparc64/kernel/rtrap.S 901-mjb1.1/arch/sparc64/kernel/rtrap.S
--- 000-virgin/arch/sparc64/kernel/rtrap.S	Sat May 10 18:34:35 2003
+++ 901-mjb1.1/arch/sparc64/kernel/rtrap.S	Wed Aug 13 20:29:41 2003
@@ -15,6 +15,10 @@
 #include <asm/visasm.h>
 #include <asm/processor.h>
 
+#ifndef CONFIG_KGDB_THREAD
+#define user_schedule schedule
+#endif
+
 #define		RTRAP_PSTATE		(PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
 #define		RTRAP_PSTATE_IRQOFF	(PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV)
 #define		RTRAP_PSTATE_AG_IRQOFF	(PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
@@ -33,7 +37,7 @@ __handle_softirq:
 		ba,a,pt			%xcc, __handle_softirq_continue
 		 nop
 __handle_preemption:
-		call			schedule
+		call			user_schedule
 		 wrpr			%g0, RTRAP_PSTATE, %pstate
 		ba,pt			%xcc, __handle_preemption_continue
 		 wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
@@ -48,7 +52,7 @@ __handle_user_windows:
 
 		be,pt			%xcc, 1f
 		 nop
-		call			schedule
+		call			user_schedule
 		 wrpr			%g0, RTRAP_PSTATE, %pstate
 		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
 		ldx			[%g6 + TI_FLAGS], %l0
@@ -92,7 +96,7 @@ __handle_perfctrs:
 		be,pt			%xcc, 1f
 
 		 nop
-		call			schedule
+		call			user_schedule
 		 wrpr			%g0, RTRAP_PSTATE, %pstate
 		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
 		ldx			[%g6 + TI_FLAGS], %l0
@@ -273,7 +277,7 @@ to_kernel:
 		 sethi			%hi(PREEMPT_ACTIVE), %l6
 		stw			%l6, [%g6 + TI_PRE_COUNT]
 		wrpr			0, %pil
-		call			schedule
+		call			user_schedule
 		 nop
 		ba,pt			%xcc, rtrap
 		 stw			%g0, [%g6 + TI_PRE_COUNT]
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/sparc64/lib/rwlock.S 901-mjb1.1/arch/sparc64/lib/rwlock.S
--- 000-virgin/arch/sparc64/lib/rwlock.S	Sun Nov 17 20:29:44 2002
+++ 901-mjb1.1/arch/sparc64/lib/rwlock.S	Wed Aug 13 20:29:36 2003
@@ -63,5 +63,33 @@ __write_lock: /* %o0 = lock_ptr */
 	be,pt		%icc, 99b
 	 membar		#StoreLoad | #StoreStore
 	ba,a,pt		%xcc, 1b
+ 
+	.globl	__read_trylock
+__read_trylock: /* %o0 = lock_ptr */
+	ldsw		[%o0], %g5
+	brlz,pn		%g5, 100f
+	 add		%g5, 1, %g7
+	cas		[%o0], %g5, %g7
+	cmp		%g5, %g7
+	bne,pn		%icc, __read_trylock
+	 membar		#StoreLoad | #StoreStore
+	retl
+	 mov		1, %o0
+
+	.globl		__write_trylock
+__write_trylock: /* %o0 = lock_ptr */
+	sethi		%hi(0x80000000), %g2
+1:	lduw		[%o0], %g5
+4:	brnz,pn		%g5, 100f
+	 or		%g5, %g2, %g7
+	cas		[%o0], %g5, %g7
+	cmp		%g5, %g7
+	bne,pn		%icc, 1b
+	 membar		#StoreLoad | #StoreStore
+	retl
+	 mov		1, %o0
+100:	retl
+	 mov		0, %o0
+
 rwlock_impl_end:
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/x86_64/Kconfig 901-mjb1.1/arch/x86_64/Kconfig
--- 000-virgin/arch/x86_64/Kconfig	Wed Aug 13 20:24:21 2003
+++ 901-mjb1.1/arch/x86_64/Kconfig	Wed Aug 13 20:51:56 2003
@@ -451,6 +451,37 @@ source "net/bluetooth/Kconfig"
 
 source "arch/x86_64/oprofile/Kconfig"
 
+menu "GCOV coverage profiling"
+
+config GCOV_PROFILE
+        bool "GCOV coverage profiling"
+        ---help---
+        Provide infrastructure for coverage support for the kernel. This
+        will not compile the kernel by default with the necessary flags.
+        To obtain coverage information for the entire kernel, one should
+        enable the subsequent option (Profile entire kernel). If only
+        particular files or directories of the kernel are desired, then
+        one must provide the following compile options for such targets:
+                "-fprofile-arcs -ftest-coverage" in the CFLAGS. To obtain
+        access to the coverage data one must insmod the gcov-prof kernel
+        module.
+
+config GCOV_ALL
+        bool "GCOV_ALL"
+        depends on GCOV_PROFILE
+        ---help---
+        If you say Y here, it will compile the entire kernel with coverage
+        option enabled.
+
+config GCOV_PROC
+        tristate "gcov-proc module"
+        depends on GCOV_PROFILE && PROC_FS
+        ---help---
+        This is the gcov-proc module that exposes gcov data through the
+        /proc filesystem
+
+endmenu
+
 menu "Kernel hacking"
 
 config DEBUG_KERNEL
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/x86_64/kernel/entry.S 901-mjb1.1/arch/x86_64/kernel/entry.S
--- 000-virgin/arch/x86_64/kernel/entry.S	Fri May 30 19:02:02 2003
+++ 901-mjb1.1/arch/x86_64/kernel/entry.S	Wed Aug 13 20:29:41 2003
@@ -46,6 +46,10 @@
 
 #define PDAREF(field) %gs:field	 		
 
+#ifndef CONFIG_KGDB_THREAD
+#define user_schedule schedule
+#endif
+
 #ifdef CONFIG_PREEMPT
 #define preempt_stop cli
 #else
@@ -187,7 +191,7 @@ sysret_careful:
 	jnc sysret_signal
 	sti
 	pushq %rdi
-	call schedule
+	call user_schedule
 	popq  %rdi
 	jmp sysret_check
 
@@ -256,7 +260,7 @@ int_careful:
 	jnc  int_very_careful
 	sti
 	pushq %rdi
-	call schedule
+	call user_schedule
 	popq %rdi
 	jmp int_with_check
 
@@ -426,7 +430,7 @@ retint_careful:
 	jnc   retint_signal
 	sti
 	pushq %rdi
-	call  schedule
+	call  user_schedule
 	popq %rdi		
 	GET_THREAD_INFO(%rcx)
 	cli
@@ -460,7 +464,7 @@ retint_kernel:	
 	jc   retint_restore_args
 	movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
 	sti
-	call schedule
+	call user_schedule
 	cli
 	GET_THREAD_INFO(%rcx)
 	movl $0,threadinfo_preempt_count(%rcx) 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/arch/x86_64/kernel/head.S 901-mjb1.1/arch/x86_64/kernel/head.S
--- 000-virgin/arch/x86_64/kernel/head.S	Wed Aug 13 20:24:21 2003
+++ 901-mjb1.1/arch/x86_64/kernel/head.S	Wed Aug 13 20:51:56 2003
@@ -383,3 +383,23 @@ ENTRY(idt_table)	
 	.quad 	0
 	.endr
 
+#ifdef CONFIG_GCOV_PROFILE
+/*
+ * The .ctors-section contains a list of pointers to constructor
+ * functions which are used to initialize gcov structures.
+ *
+ * Because there is no NULL at the end of the constructor list
+ * in the kernel we need the addresses of both the constructor
+ * as well as the destructor list which are supposed to be
+ * adjacent.
+ */
+
+.section ".ctors","aw"
+.globl  __CTOR_LIST__
+.type   __CTOR_LIST__,@object
+__CTOR_LIST__:
+.section ".dtors","aw"
+.globl  __DTOR_LIST__
+.type   __DTOR_LIST__,@object
+__DTOR_LIST__:
+#endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/Makefile 901-mjb1.1/drivers/Makefile
--- 000-virgin/drivers/Makefile	Wed Jul  2 21:59:08 2003
+++ 901-mjb1.1/drivers/Makefile	Wed Aug 13 20:51:56 2003
@@ -49,3 +49,4 @@ obj-$(CONFIG_ISDN_BOOL)		+= isdn/
 obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
+obj-$(CONFIG_GCOV_PROC)		+= gcov/
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/char/Makefile 901-mjb1.1/drivers/char/Makefile
--- 000-virgin/drivers/char/Makefile	Sat Jun 14 18:37:27 2003
+++ 901-mjb1.1/drivers/char/Makefile	Wed Aug 13 20:29:29 2003
@@ -25,6 +25,7 @@ obj-$(CONFIG_COMPUTONE) += ip2.o ip2main
 obj-$(CONFIG_RISCOM8) += riscom8.o
 obj-$(CONFIG_ISI) += isicom.o
 obj-$(CONFIG_ESPSERIAL) += esp.o
+obj-$(CONFIG_X86_REMOTE_DEBUG) += gdbserial.o
 obj-$(CONFIG_SYNCLINK) += synclink.o
 obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o
 obj-$(CONFIG_N_HDLC) += n_hdlc.o
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/char/gdbserial.c 901-mjb1.1/drivers/char/gdbserial.c
--- 000-virgin/drivers/char/gdbserial.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/drivers/char/gdbserial.c	Wed Aug 13 20:29:29 2003
@@ -0,0 +1,274 @@
+/*
+ * Serial interface GDB stub
+ *
+ * Written (hacked together) by David Grothe (dave@gcom.com)
+ *
+ * Modified by Scott Foehner (sfoehner@engr.sgi.com) to allow connect
+ * on boot-up
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/serial_reg.h>
+#include <linux/serialP.h>
+#include <linux/config.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>
+#include <linux/gdb.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/segment.h>
+#include <asm/bitops.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/atomic.h>
+
+#undef	PRNT			/* define for debug printing */
+
+#define	GDB_BUF_SIZE	512	/* power of 2, please */
+
+static char gdb_buf[GDB_BUF_SIZE];
+static int gdb_buf_in_inx;
+static atomic_t gdb_buf_in_cnt;
+static int gdb_buf_out_inx;
+
+extern void set_debug_traps(void);	/* GDB routine */
+extern int gdb_serial_setup(int ttyS, int baud, int *port, int *irq);
+extern void shutdown_for_gdb(struct async_struct *info);
+						/* in serial.c */
+
+int gdb_irq;
+int gdb_port;
+int gdb_ttyS = 1;		/* Default: ttyS1 */
+int gdb_baud = 38400;
+int gdb_enter = 0;		/* Default: do not do gdb_hook on boot */
+int gdb_initialized = 0;
+
+static int initialized = -1;
+
+/*
+ * Get a byte from the hardware data buffer and return it
+ */
+static int
+read_data_bfr(void)
+{
+	if (inb(gdb_port + UART_LSR) & UART_LSR_DR)
+		return (inb(gdb_port + UART_RX));
+
+	return (-1);
+
+}				/* read_data_bfr */
+
+/*
+ * Get a char if available, return -1 if nothing available.
+ * Empty the receive buffer first, then look at the interface hardware.
+ */
+static int
+read_char(void)
+{
+	if (atomic_read(&gdb_buf_in_cnt) != 0) {	/* intr routine has q'd chars */
+		int chr;
+
+		chr = gdb_buf[gdb_buf_out_inx++];
+		gdb_buf_out_inx &= (GDB_BUF_SIZE - 1);
+		atomic_dec(&gdb_buf_in_cnt);
+		return (chr);
+	}
+
+	return (read_data_bfr());	/* read from hardware */
+
+}				/* read_char */
+
+/*
+ * Wait until the interface can accept a char, then write it.
+ */
+static void
+write_char(int chr)
+{
+	while (!(inb(gdb_port + UART_LSR) & UART_LSR_THRE)) ;
+
+	outb(chr, gdb_port + UART_TX);
+
+}				/* write_char */
+
+/*
+ * This is the receiver interrupt routine for the GDB stub.
+ * It will receive a limited number of characters of input
+ * from the gdb  host machine and save them up in a buffer.
+ *
+ * When the gdb stub routine getDebugChar() is called it
+ * draws characters out of the buffer until it is empty and
+ * then reads directly from the serial port.
+ *
+ * We do not attempt to write chars from the interrupt routine
+ * since the stubs do all of that via putDebugChar() which
+ * writes one byte after waiting for the interface to become
+ * ready.
+ *
+ * The debug stubs like to run with interrupts disabled since,
+ * after all, they run as a consequence of a breakpoint in
+ * the kernel.
+ *
+ * Perhaps someone who knows more about the tty driver than I
+ * care to learn can make this work for any low level serial
+ * driver.
+ */
+static irqreturn_t
+gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	int chr;
+	int iir;
+
+	do {
+		chr = read_data_bfr();
+		iir = inb(gdb_port + UART_IIR);
+#ifdef PRNT
+		printk("gdb_interrupt: chr=%02x '%c'  after read iir=%02x\n",
+		       chr, chr > ' ' && chr < 0x7F ? chr : ' ', iir);
+#endif
+		if (chr < 0)
+			continue;
+
+		if (chr == 3) {	/* Ctrl-C means remote interrupt */
+			breakpoint();
+			continue;
+		}
+
+		if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) {	/* buffer overflow, clear it */
+			gdb_buf_in_inx = 0;
+			atomic_set(&gdb_buf_in_cnt, 0);
+			gdb_buf_out_inx = 0;
+			break;
+		}
+
+		gdb_buf[gdb_buf_in_inx++] = chr;
+		gdb_buf_in_inx &= (GDB_BUF_SIZE - 1);
+		atomic_inc(&gdb_buf_in_cnt);
+	}
+	while (iir & UART_IIR_RDI);
+	return IRQ_HANDLED;
+}				/* gdb_interrupt */
+
+/*
+ * Just a NULL routine for testing.
+ */
+void
+gdb_null(void)
+{
+}				/* gdb_null */
+
+extern int serial8250_init(void);
+
+int
+gdb_hook(void)
+{
+	int retval;
+
+#ifdef CONFIG_SMP
+	if (NR_CPUS > KGDB_MAX_NO_CPUS) {
+		printk
+		    ("kgdb: too manu cpus. Cannot enable debugger with more than 8 cpus\n");
+		return (-1);
+	}
+#endif
+
+	/*
+	 * Call first time just to get the ser ptr
+	 */
+
+	serial8250_init();
+
+	if (gdb_serial_setup(gdb_ttyS, gdb_baud, &gdb_port, &gdb_irq)) {
+		printk("gdb_serial_setup() error");
+		return (-1);
+	}
+
+	retval = request_irq(gdb_irq,
+			     gdb_interrupt, SA_INTERRUPT, "GDB-stub", NULL);
+	if (retval == 0)
+		initialized = 1;
+	else {
+		initialized = 0;
+		printk("gdb_hook: request_irq(irq=%d) failed: %d\n", gdb_irq,
+		       retval);
+	}
+
+	/*
+	 * Call GDB routine to setup the exception vectors for the debugger
+	 */
+	set_debug_traps();
+
+	/*
+	 * Call the breakpoint() routine in GDB to start the debugging
+	 * session.
+	 */
+	printk("Waiting for connection from remote gdb... ");
+	breakpoint();
+	gdb_null();
+
+	printk("Connected.\n");
+
+	gdb_initialized = 1;
+	return (0);
+
+}				/* gdb_hook_interrupt2 */
+
+/*
+ * getDebugChar
+ *
+ * This is a GDB stub routine.  It waits for a character from the
+ * serial interface and then returns it.  If there is no serial
+ * interface connection then it returns a bogus value which will
+ * almost certainly cause the system to hang.
+ */
+int
+getDebugChar(void)
+{
+	volatile int chr;
+
+#ifdef PRNT
+	printk("getDebugChar: ");
+#endif
+
+	while ((chr = read_char()) < 0)
+		touch_nmi_watchdog();
+
+#ifdef PRNT
+	printk("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ');
+#endif
+	return (chr);
+
+}				/* getDebugChar */
+
+/*
+ * putDebugChar
+ *
+ * This is a GDB stub routine.  It waits until the interface is ready
+ * to transmit a char and then sends it.  If there is no serial
+ * interface connection then it simply returns to its caller, having
+ * pretended to send the char.
+ */
+void
+putDebugChar(int chr)
+{
+#ifdef PRNT
+	printk("putDebugChar: chr=%02x '%c'\n", chr,
+	       chr > ' ' && chr < 0x7F ? chr : ' ');
+#endif
+
+	write_char(chr);	/* this routine will wait */
+
+}				/* putDebugChar */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/char/sysrq.c 901-mjb1.1/drivers/char/sysrq.c
--- 000-virgin/drivers/char/sysrq.c	Fri May 30 19:02:05 2003
+++ 901-mjb1.1/drivers/char/sysrq.c	Wed Aug 13 20:29:29 2003
@@ -134,6 +134,18 @@ static struct sysrq_key_op sysrq_mountro
 
 /* END SYNC SYSRQ HANDLERS BLOCK */
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+static void sysrq_handle_gdb(int key, struct pt_regs *pt_regs,
+			struct tty_struct *tty) {
+	int gdb_hook(void);
+	gdb_hook();
+}
+static struct sysrq_key_op sysrq_gdb_op = {
+	handler:	sysrq_handle_gdb,
+	help_msg:	"Gdb",
+	action_msg:	"Entering debugger",
+};
+#endif
 
 /* SHOW SYSRQ HANDLERS BLOCK */
 
@@ -240,7 +252,11 @@ static struct sysrq_key_op *sysrq_key_ta
 /* d */	NULL,
 /* e */	&sysrq_term_op,
 /* f */	NULL,
+#ifdef CONFIG_X86_REMOTE_DEBUG
+/* g */	&sysrq_gdb_op,
+#else /* CONFIG_X86_REMOTE_DEBUG */
 /* g */	NULL,
+#endif /* CONFIG_X86_REMOTE_DEBUG */
 /* h */	NULL,
 /* i */	&sysrq_kill_op,
 /* j */	NULL,
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/char/tty_io.c 901-mjb1.1/drivers/char/tty_io.c
--- 000-virgin/drivers/char/tty_io.c	Wed Aug 13 20:24:22 2003
+++ 901-mjb1.1/drivers/char/tty_io.c	Wed Aug 13 20:29:29 2003
@@ -91,6 +91,9 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/device.h>
+#ifdef CONFIG_GDB_CONSOLE
+#include <linux/gdb.h>
+#endif
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -2190,6 +2193,13 @@ void tty_register_device(struct tty_driv
 	devfs_mk_cdev(dev, S_IFCHR | S_IRUSR | S_IWUSR,
 			"%s%d", driver->devfs_name, index + driver->name_base);
 
+	{
+		extern int kgdb_not_ready_yet;
+
+		if (kgdb_not_ready_yet)
+			return;
+	}
+
 	/* we don't care about the ptys */
 	/* how nice to hide this behind some crappy interface.. */
 	if (driver->type != TTY_DRIVER_TYPE_PTY) {
@@ -2415,6 +2425,9 @@ void __init console_init(void)
 		(*call)();
 		call++;
 	}
+#ifdef CONFIG_GDB_CONSOLE
+	gdb_console_init();
+#endif
 }
 
 #ifdef CONFIG_VT
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/gcov/Makefile 901-mjb1.1/drivers/gcov/Makefile
--- 000-virgin/drivers/gcov/Makefile	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/drivers/gcov/Makefile	Wed Aug 13 20:51:56 2003
@@ -0,0 +1,8 @@
+#
+# Makefile for GCOV profiling kernel module
+#
+
+obj-$(CONFIG_GCOV_PROC)	+= gcov-proc.o
+
+$(obj)/gcov-proc.o: $(obj)/gcov-proc.c
+
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/gcov/gcov-proc.c 901-mjb1.1/drivers/gcov/gcov-proc.c
--- 000-virgin/drivers/gcov/gcov-proc.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/drivers/gcov/gcov-proc.c	Wed Aug 13 20:51:56 2003
@@ -0,0 +1,713 @@
+/*
+ * This kernel module provides access to coverage data produced by
+ * an instrumented kernel via an entry in the proc file system
+ * at /proc/gcov/.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) International Business Machines Corp., 2002
+ *
+ * Author: Hubertus Franke <frankeh@us.ibm.com>
+ *         Rajan Ravindran <rajancr@us.ibm.com>
+ *
+ * 	Bugfixes by Peter.Oberparleiter@de.ibm.com:
+ * 	Changes by Paul Larson
+ * 		Automatically detect gcc version for gcov_type
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>   
+#include <linux/module.h>   
+
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+
+MODULE_LICENSE("GPL");
+#define GCOV_PROF_PROC		"gcov"
+
+static DECLARE_MUTEX_LOCKED(gcov_lock);  
+#define DOWN()  down(&gcov_lock);
+#define UP()    up(&gcov_lock);
+#define PAD8(x)	((x + 7) & ~7)
+
+//#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,4))
+//static inline struct proc_dir_entry *PDE(const struct inode *inode)
+//{
+//	return ((struct proc_dir_entry *) inode->u.generic_ip);
+//}
+//#endif
+
+/* ###################################################################
+   # NOTICE ##########################################################
+   ###################################################################
+
+   GCOV_TYPE defines the count type used by the instrumentation code.
+   Kernels compiled with a gcc version prior to 3.1 should use LONG,
+   otherwise LONG LONG.  */
+
+#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 1
+typedef long long gcov_type;
+#else
+typedef long gcov_type;
+#endif
+
+
+struct bb
+{
+  long zero_word;
+  const char *filename;
+  gcov_type *counts;
+  long ncounts;
+  struct bb *next;
+  const unsigned long *addresses;
+
+  /* Older GCC's did not emit these fields.  */
+  long nwords;
+  const char **functions;
+  const long *line_nums;
+  const char **filenames;
+  char *flags;
+};
+
+extern struct bb *bb_head;
+static struct file_operations proc_gcov_operations;
+extern char   *gcov_kernelpath;
+extern void   (*gcov_callback)(int cmd, struct bb *);
+extern void   do_global_ctors(char *, char *, struct module *, int);
+ 
+static int create_bb_links = 1;
+static int kernel_path_len;
+
+int debug = 0;
+#define PPRINTK(x) do { if (debug) { printk x ; } } while (0)
+
+struct gcov_ftree_node
+{
+	int   isdir;    /* directory or file */
+	char *fname;    /* only the name within the hierachy */
+	struct gcov_ftree_node *sibling;   /* sibling of tree  */
+	struct gcov_ftree_node *files;  /* children of tree */
+	struct gcov_ftree_node *parent; /* parent of current gcov_ftree_node */
+	struct proc_dir_entry  *proc[4];
+	struct bb	      *bb;
+	/* below only valid for leaf nodes == files */
+	unsigned long  offset;	  /* offset in global file */
+	struct gcov_ftree_node *next;   /* next leave node       */
+};
+
+static struct proc_dir_entry  *proc_vmlinux = NULL;
+static struct gcov_ftree_node *leave_nodes = NULL;
+static struct gcov_ftree_node *dumpall_cached_node = NULL;
+static struct gcov_ftree_node tree_root  = 
+	{ 1, GCOV_PROF_PROC, NULL, NULL, NULL,
+	  { NULL, NULL, NULL, NULL} , NULL, 0,NULL };
+static char *endings[3] = { ".bb", ".bbg", ".c" };
+
+
+/* Calculate the header size of an entry in the vmlinux-tracefile which
+   contains the collection of trace data of all instrumented kernel objects.
+
+   An entry header is defined as:
+     0:  length of filename of the respective .da file padded to 8 bytes
+     8:  filename padded to 8 bytes
+
+ */
+
+static inline unsigned long
+hdr_ofs (struct gcov_ftree_node *tptr)
+{
+	return 8 + PAD8(strlen (tptr->bb->filename) + 1);
+}
+
+
+/* Calculate the total size of an entry in the vmlinux-tracefile.
+   An entry consists of the header, an 8 byte word for the number
+   of counts in this entry and the actual array of 8 byte counts.  */
+
+static inline unsigned long
+dump_size(struct gcov_ftree_node *tptr)
+{
+	return (hdr_ofs(tptr) + (tptr->bb->ncounts+1)*8);
+}
+
+
+/* Store a portable representation of VALUE in DEST using BYTES*8-1 bits.
+   Return a non-zero value if VALUE requires more than BYTES*8-1 bits
+   to store (this is adapted code from gcc/gcov-io.h).  */
+
+static int
+store_gcov_type (gcov_type value, void *buf, int offset, int len)
+{
+	const size_t bytes = 8;
+	char dest[10];
+	int upper_bit = (value < 0 ? 128 : 0);
+	size_t i;
+ 
+	if (value < 0) {
+		gcov_type oldvalue = value;
+		value = -value;
+		if (oldvalue != -value)
+		return 1;
+	}
+ 
+	for(i = 0 ;
+	    i < (sizeof (value) < bytes ? sizeof (value) : bytes) ;
+	    i++) {
+		dest[i] = value & (i == (bytes - 1) ? 127 : 255);
+		value = value / 256;
+	}
+ 
+	if (value && value != -1)
+		return 1;
+ 
+	for(; i < bytes ; i++)
+	  dest[i] = 0;
+	dest[bytes - 1] |= upper_bit;
+	copy_to_user(buf,&dest[offset],len);
+	return 0;
+}
+
+
+/* Create a directory entry in the proc file system and fill in
+   the respective fields in the provided tree node. Return a
+   non-zero value on error.  */
+
+int
+create_dir_proc (struct gcov_ftree_node *bt, char *fname) 
+{
+	bt->proc[0] = proc_mkdir(fname, bt->parent->proc[0]);
+	bt->proc[1] = bt->proc[2] = bt->proc[3] = NULL;
+	return (bt->proc[0] == NULL);
+}
+
+
+/* Replace file ending <end> in <fname> with <newend>. Return a new
+   string containing the new filename or NULL on error.  */
+
+static 
+char* replace_ending (const char *fname,char *end, char *newend)
+{
+	char *newfname;
+	char *cptr = strstr(fname,end);
+	int len;
+	if (cptr == NULL) 
+		return NULL;
+	len = cptr - fname;
+	newfname = (char*)kmalloc(len+strlen(newend)+1,GFP_KERNEL);
+	if (newfname == NULL) 
+		return NULL;
+	memcpy(newfname,fname,len);
+	strcpy(newfname+len,newend);
+	return newfname;	
+} 
+	
+
+/* Create a file entry in the proc file system and update the respective
+   fields on the tree node. Optionally try to create links to the
+   source, .bb and .bbg files. Return a non-zero value on error.  */
+
+int
+create_file_proc (struct gcov_ftree_node *bt, struct bb *bptr, char *fname,
+		  const char *fullname) 
+{
+	bt->proc[0]  = create_proc_entry(fname, S_IWUSR | S_IRUGO, 
+					 bt->parent->proc[0]);
+	if (!bt->proc[0]) {
+		PPRINTK(("error creating file proc <%s>\n", fname));
+		return 1;
+	}
+
+	bt->proc[0]->proc_fops = &proc_gcov_operations;
+	bt->proc[0]->size = 8 + (8 * bptr->ncounts);
+
+	if (create_bb_links) {
+		int i;
+		for (i=0;i<3;i++) {
+			char *newfname;
+			char *newfullname;
+			newfname    = replace_ending(fname,".da",endings[i]);
+			newfullname = replace_ending(fullname,".da",endings[i]);
+			if ((newfname) && (newfullname)) {
+				bt->proc[i+1]  = proc_symlink(newfname,bt->parent->proc[0],newfullname);
+			}
+			if (newfname) kfree(newfname);
+			if (newfullname) kfree(newfullname);
+		}
+	} else {
+		bt->proc[1] = bt->proc[2] = bt->proc[3] = NULL; 
+	}
+	return 0;
+}
+
+
+/* Recursively check and if necessary create the file specified by <name>
+   and all its path components, both in the proc file-system as
+   well as in the internal tree structure.  */
+
+void 
+check_proc_fs(const char *fullname, struct gcov_ftree_node *parent, 
+		   char *name, struct bb *bbptr)
+{
+	char dirname[128];
+	char *localname = name;
+	char *tname;
+	int  isdir;
+	struct gcov_ftree_node *tptr;
+
+	tname = strstr(name, "/");
+	if ((isdir = (tname != NULL))) {
+		memcpy(dirname,name,tname-name);
+		dirname[tname-name] = '\0';
+		localname = dirname;
+	}
+
+	/* search the list of files in gcov_ftree_node and 
+	 * see whether file already exists in this directory level */
+	for ( tptr = parent->files ; tptr ; tptr = tptr->sibling) {
+		if (!strcmp(tptr->fname,localname))
+			break;
+	}
+	if (!tptr) {
+		/* no entry yet */
+		tptr = (struct gcov_ftree_node*)
+			kmalloc(sizeof(struct gcov_ftree_node),GFP_KERNEL);
+		tptr->parent  = parent;
+
+		if (!isdir) {
+			if (create_file_proc(tptr, bbptr, localname,fullname)) {
+				kfree(tptr);
+				return;
+			}
+			tptr->bb	 = bbptr;
+			tptr->proc[0]->data = tptr;
+			tptr->next = leave_nodes;
+			leave_nodes = tptr;
+		} else {
+			int len = strlen(dirname)+1;
+			localname = (char*)kmalloc(len,GFP_KERNEL);
+			strncpy(localname,dirname,len);
+			if (create_dir_proc(tptr,localname)) {
+				kfree(tptr);
+				kfree(localname);
+				return;
+			}
+			tptr->bb	 = NULL;
+			tptr->proc[0]->data = NULL;
+			tptr->next       = NULL;
+		}
+		tptr->isdir   = isdir;
+		tptr->fname   = localname;
+		tptr->files   = NULL;
+		tptr->sibling = parent->files;
+		parent->files = tptr;
+	}
+	if (isdir)
+		check_proc_fs(fullname,tptr,tname+1,bbptr);
+}
+
+
+/* Read out tracefile data to user space. Return the number of bytes
+   read.  */
+
+static ssize_t 
+read_gcov(struct file *file, char *buf,
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t read;
+	gcov_type ncnt;
+	struct bb *bbptr;
+	gcov_type slen;
+	gcov_type *wptr;
+	struct gcov_ftree_node *treeptr; 
+	struct proc_dir_entry * de;
+	int dumpall;
+	unsigned int hdrofs;
+	unsigned long poffs;
+
+	DOWN();
+
+	read   = 0;
+	hdrofs = 0;
+	poffs  = 0;
+	de = PDE(file->f_dentry->d_inode);
+
+	/* Check whether this is a request to /proc/gcov/vmlinux in
+	   which case we should dump the complete tracefile.  */
+	dumpall = (de == proc_vmlinux);
+
+
+	/* Have treeptr point to the tree node to be dumped.  */
+
+	if (!dumpall)
+		treeptr = (struct gcov_ftree_node*) (de ? de->data : NULL);
+	else {
+		/* dumpall_cached_node will speed up things in case
+		   of a sequential read.  */
+		if (dumpall_cached_node && (p >= dumpall_cached_node->offset)) {
+			treeptr = dumpall_cached_node;
+		}
+		else
+			treeptr = leave_nodes;
+
+		/* Search the tree node that covers the requested
+		   tracefile offset.  */
+		while (treeptr) {
+			struct gcov_ftree_node *next = treeptr->next;
+			if ((next == NULL) || (p < next->offset)) {
+				hdrofs = hdr_ofs(treeptr);
+				poffs  = treeptr->offset;
+				break;
+			}
+			treeptr = next;
+		}
+		dumpall_cached_node = treeptr;
+	}
+
+	bbptr = treeptr ? treeptr->bb : NULL;
+
+	if (bbptr == NULL)
+		goto out;
+
+	ncnt = (gcov_type) bbptr->ncounts;
+	p -= poffs;
+
+	do { 
+		if (p < hdrofs) {
+			/* User wants to read parts of the header.  */
+
+			slen = PAD8(strlen(treeptr->bb->filename)+1);
+
+			if (p >= 8) {
+				/* Read filename */
+				if (slen > (gcov_type) count) slen = count;
+				copy_to_user (buf, &treeptr->bb->filename[p-8],
+					      slen);
+				count-=slen;buf+= slen;read+=slen;p+=slen;
+				continue;
+			}
+			wptr = &slen;
+		} 
+		else if (p < (hdrofs + 8)) {
+			/* User wants to read the number of counts in this
+			   entry.  */
+
+			wptr = &ncnt;
+		}
+		else if (p < (hdrofs) + (unsigned long) (ncnt+1)*8) {
+			/* User wants to read actual counters */
+
+			wptr = &bbptr->counts[((p-hdrofs)/8)-1];
+		}
+		else
+			break;
+
+		/* do we have to write partial word */	
+
+		if ((count < 8) || (p & 0x7)) {
+			/* partial write */
+			unsigned long offset = p & 0x7;
+			unsigned long length = (count+offset)<8?count:(8-offset);
+
+			store_gcov_type(*wptr,buf, offset, length);
+			buf+=length;p+=length;count-=length;read+=length;
+			break;
+		} else {
+			store_gcov_type(*wptr,buf, 0, 8);
+			buf+=8;p+=8;count-=8;read+=8;
+		}
+	} while (count > 0);
+	*ppos = p + poffs;
+out:
+	UP();
+	return read;
+}
+
+
+/* A write to any of our proc file-system entries is interpreted
+   as a request to reset the data from that node.  */
+
+static ssize_t 
+write_gcov(struct file * file, const char * buf,
+		       size_t count, loff_t *ppos)
+{
+	struct bb *ptr;
+	struct proc_dir_entry * de;
+	int resetall, i;
+	struct gcov_ftree_node *tptr; 
+
+	DOWN();
+
+	de = PDE(file->f_dentry->d_inode);
+
+	if (de == NULL) { 
+		count = 0;
+		goto out;
+	}
+
+	/* Check for a write to /proc/gcov/vmlinux */
+	resetall = (de == proc_vmlinux);
+
+	if (resetall) {
+		/* Reset all nodes */
+		for (ptr = bb_head; ptr != (struct bb *) 0; ptr = ptr->next)
+		{
+       			int i;
+			if (ptr->counts == NULL) continue;
+			for (i = 0; i < ptr->ncounts; i++) 
+				ptr->counts[i]=0;
+		}
+	} else {
+		/* Reset a single node */
+		tptr = (struct gcov_ftree_node*)(de->data);
+		if (tptr == NULL)
+			goto out;
+		ptr = tptr->bb; 
+		if (ptr->ncounts != 0) {
+			for (i = 0; i < ptr->ncounts; i++) 
+				ptr->counts[i]=0;
+		}
+	}
+out:
+	UP();
+	return count;
+}
+
+
+/* This struct identifies the functions to be used for proc file-system
+   interaction.  */
+
+static struct file_operations proc_gcov_operations = {
+	read:	read_gcov,
+	write:	write_gcov
+};
+
+
+/* Recursively remove a node and all its children from the internal
+   data tree and from the proc file-system.  */
+
+void 
+cleanup_node(struct gcov_ftree_node *node, int delname, int del_in_parent)
+{
+	struct gcov_ftree_node *next,*tptr;
+	struct proc_dir_entry *par_proc;
+
+	PPRINTK(("parent n:%p p:%p f:%p s:%p <%s>\n", node, 
+		node->parent, node->files, node->sibling, node->fname));
+	if ((tptr = node->parent)) { 
+		if (del_in_parent) {
+			/* Remove node from parent's list of children */
+			struct gcov_ftree_node *cptr,*prev_cptr;
+			for ( prev_cptr = NULL, cptr = tptr->files; cptr && (cptr != node);
+			      prev_cptr = cptr, cptr = cptr->sibling); 
+			if (prev_cptr == NULL)
+				tptr->files = cptr->sibling;
+			else
+				prev_cptr->sibling = cptr->sibling;
+		}
+		par_proc = (struct proc_dir_entry*)(tptr->proc[0]);
+	} else
+		par_proc = &proc_root;
+
+	if (node->isdir) {
+		/* In case of a directory, clean up all child nodes.  */
+		next = node->files;
+		node->files = NULL;
+		for (tptr = next ; tptr; ) {
+			next = tptr->sibling;
+			cleanup_node(tptr,1,0);
+			tptr = next;
+		}
+		remove_proc_entry(node->fname, par_proc);
+		if (delname) kfree(node->fname);
+	} else {
+		/* Remove file entry and optional links.  */
+		remove_proc_entry(node->fname, par_proc);
+		if (create_bb_links) {
+			int i;
+			for (i=0;i<3;i++) {
+				char *newfname;
+				if (node->proc[i+1] == NULL) continue;
+				newfname    = replace_ending(node->fname,".da",endings[i]);
+				if (newfname) {
+					PPRINTK(("remove_proc_entry <%s>\n", node->fname));
+					remove_proc_entry(newfname, par_proc);
+					kfree(newfname);
+				}
+			}
+		}     
+	}
+	/* free the data */
+	if (node != &tree_root) 
+		kfree(node);
+}
+
+
+/* Create a tree node for the given bb struct and initiate the
+   creation of a corresponding proc file-system entry.  */
+
+static void
+create_node_tree(struct bb *bbptr)
+{
+	const char *tmp;
+	const char *filename = bbptr->filename;
+	char *modname;
+	int len;
+
+	PPRINTK(("kernelpath <%s> <%s>\n", gcov_kernelpath, filename));
+
+	/* Check whether this is a file located in the kernel source
+	   directory.  */
+	if (!strncmp (filename, gcov_kernelpath, kernel_path_len))
+	{
+		/* Remove kernel path and create relative proc-file-system
+		   entry.  */
+		tmp = filename + kernel_path_len+1;
+		if (*tmp == '0') return; 
+		check_proc_fs(filename, &tree_root, (char*)tmp, bbptr);
+	} 
+	else {
+		/* Insert entry to module sub-directory.  */
+		len = strlen(filename);
+ 		modname = (char *)kmalloc (len + 7, GFP_KERNEL);
+		strcpy(modname, "module");
+		strcat (modname, filename);
+		check_proc_fs(filename, &tree_root, modname, bbptr);
+	}
+}
+
+
+/* This function will be used as gcov_callback, i.e. it is
+   called from constructor and destructor code of all instrumented
+   object files. It updates the local tree structure and the proc
+   file-system entries.  */
+
+static void 
+gcov_cleanup(int cmd, struct bb *bbptr)
+{
+	unsigned long offset = 0;
+	struct gcov_ftree_node *tptr;
+	struct gcov_ftree_node *parent;
+	struct gcov_ftree_node *prev_cptr;
+
+	DOWN(); 
+	switch (cmd) {
+	case 0:
+		/* remove leave node */
+		prev_cptr = NULL;
+		for (tptr = leave_nodes; tptr ; prev_cptr = tptr, tptr = tptr->next) {
+			if (tptr->bb == bbptr) break;
+		}
+		if (!tptr) {
+			PPRINTK(("Can't find module in /proc/gcov\n"));
+			UP();
+			return;
+		}
+		if (prev_cptr)
+			prev_cptr->next = tptr->next;
+		else
+			leave_nodes = tptr->next;
+		dumpall_cached_node = NULL;
+
+
+		/* Find highest level node without further siblings */
+	
+		parent = tptr->parent;
+		do {
+			if (parent->files->sibling != NULL) break;
+			tptr = parent;
+			parent = parent->parent;
+		} while (parent);
+		cleanup_node(tptr,0,1);
+
+		/* Update the offsets at which a certain node can
+		   be found in the tracefile.  */
+		for (tptr = leave_nodes; tptr; tptr = tptr->next) {
+			tptr->offset = offset; 
+			offset += dump_size(tptr);
+		}
+		break;
+
+	case 1:
+		/* insert node */
+		create_node_tree(bbptr);
+
+		/* Update the offsets at which a certain node can
+		   be found in the tracefile.  */
+		for (tptr = leave_nodes; tptr; tptr = tptr->next) {
+			tptr->offset = offset; 
+			offset += dump_size(tptr);
+		}
+
+		break;
+	}
+	UP();
+}
+
+
+/* Initialize the data structure by calling the constructor code
+   of all instrumented object files and creating the proc
+   file-system entries.  */
+
+int 
+init_module(void)
+{
+	struct bb *bbptr;
+	unsigned long offset = 0;
+	struct gcov_ftree_node *tptr; 
+
+	PPRINTK(("init module <%s>\n\n", GCOV_PROF_PROC));
+
+	do_global_ctors(NULL, NULL, NULL, 0);
+	
+	tree_root.proc[0] = proc_mkdir(GCOV_PROF_PROC, 0);
+	kernel_path_len = strlen(gcov_kernelpath);
+
+	for (bbptr = bb_head; bbptr ; bbptr = bbptr->next) {
+		create_node_tree(bbptr);
+	}
+
+	/* Fill in the offset at which a certain node can
+	   be found in the tracefile.  */
+	for (tptr = leave_nodes; tptr; tptr = tptr->next) {
+		tptr->offset = offset; 
+		offset += dump_size(tptr);
+	}
+
+	proc_vmlinux = create_proc_entry("vmlinux",S_IWUSR | S_IRUGO, 
+					 tree_root.proc[0]);
+	if (proc_vmlinux)
+		proc_vmlinux->proc_fops = &proc_gcov_operations;
+
+	gcov_callback = gcov_cleanup;
+	UP();
+	return 0;
+}
+
+
+void 
+cleanup_module(void)
+{
+	PPRINTK(("remove module <%s>\n\n", GCOV_PROF_PROC));
+	gcov_callback = NULL;
+	DOWN();
+	cleanup_node(&tree_root,0,0); 
+}
+
+//module_init(gcov_init_module);
+//module_exit(gcov_cleanup_module);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/net/loopback.c 901-mjb1.1/drivers/net/loopback.c
--- 000-virgin/drivers/net/loopback.c	Sun Nov 17 20:29:25 2002
+++ 901-mjb1.1/drivers/net/loopback.c	Wed Aug 13 20:46:15 2003
@@ -194,7 +194,7 @@ int __init loopback_init(struct net_devi
 	/* Current netfilter will die with oom linearizing large skbs,
 	 * however this will be cured before 2.5.x is done.
 	 */
-	dev->features	       |= NETIF_F_TSO;
+/*	dev->features	       |= NETIF_F_TSO; */
 
 	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
 	if (dev->priv == NULL)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/pci/probe.c 901-mjb1.1/drivers/pci/probe.c
--- 000-virgin/drivers/pci/probe.c	Wed Aug 13 20:24:26 2003
+++ 901-mjb1.1/drivers/pci/probe.c	Wed Aug 13 20:47:25 2003
@@ -176,7 +176,7 @@ void __devinit pci_read_bridge_bases(str
 		limit |= (io_limit_hi << 16);
 	}
 
-	if (base && base <= limit) {
+	if (base <= limit) {
 		res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
 		res->start = base;
 		res->end = limit + 0xfff;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/scsi/sd.c 901-mjb1.1/drivers/scsi/sd.c
--- 000-virgin/drivers/scsi/sd.c	Tue Aug  5 20:01:53 2003
+++ 901-mjb1.1/drivers/scsi/sd.c	Wed Aug 13 20:48:47 2003
@@ -61,7 +61,9 @@
  * Remaining dev_t-handling stuff
  */
 #define SD_MAJORS	16
-#define SD_DISKS	(SD_MAJORS << 4)
+#define SD_DISKS	((SD_MAJORS - 1) << 4)
+#define LAST_MAJOR_DISKS	(1 << (KDEV_MINOR_BITS - 4))
+#define TOTAL_SD_DISKS	(SD_DISKS + LAST_MAJOR_DISKS)
 
 /*
  * Time out in seconds for disks and Magneto-opticals (which are slower).
@@ -87,7 +89,7 @@ struct scsi_disk {
 	unsigned	RCD : 1;	/* state of disk RCD bit, unused */
 };
 
-static unsigned long sd_index_bits[SD_DISKS / BITS_PER_LONG];
+static unsigned long sd_index_bits[TOTAL_SD_DISKS / BITS_PER_LONG];
 static spinlock_t sd_index_lock = SPIN_LOCK_UNLOCKED;
 
 static int sd_revalidate_disk(struct gendisk *disk);
@@ -122,6 +124,9 @@ static int sd_major(int major_idx)
 		return SCSI_DISK1_MAJOR + major_idx - 1;
 	case 8 ... 15:
 		return SCSI_DISK8_MAJOR + major_idx - 8;
+#define MAX_IDX       (TOTAL_SD_DISKS >> 4)
+	case 16 ... MAX_IDX:
+		return SCSI_DISK15_MAJOR;
 	default:
 		BUG();
 		return 0;	/* shut up gcc */
@@ -1258,8 +1263,8 @@ static int sd_probe(struct device *dev)
 		goto out_free;
 
 	spin_lock(&sd_index_lock);
-	index = find_first_zero_bit(sd_index_bits, SD_DISKS);
-	if (index == SD_DISKS) {
+	index = find_first_zero_bit(sd_index_bits, TOTAL_SD_DISKS);
+	if (index == TOTAL_SD_DISKS) {
 		spin_unlock(&sd_index_lock);
 		error = -EBUSY;
 		goto out_put;
@@ -1274,15 +1279,25 @@ static int sd_probe(struct device *dev)
 	sdkp->openers = 0;
 
 	gd->major = sd_major(index >> 4);
-	gd->first_minor = (index & 15) << 4;
+#define DISKS_PER_MINOR_MASK	((1 << (KDEV_MINOR_BITS - 4)) - 1)
+	if (index > SD_DISKS) 
+		gd->first_minor = ((index - SD_DISKS) & DISKS_PER_MINOR_MASK) << 4;
+	else
+		gd->first_minor = (index & 15) << 4;
 	gd->minors = 16;
 	gd->fops = &sd_fops;
 
-	if (index >= 26) {
+	if (index < 26) {
+		sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
+	} else if (index < (26*27)) {
 		sprintf(gd->disk_name, "sd%c%c",
 			'a' + index/26-1,'a' + index % 26);
 	} else {
-		sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
+		const unsigned int m1 = (index/ 26 - 1) / 26 - 1;
+		const unsigned int m2 = (index / 26 - 1) % 26;
+		const unsigned int m3 = index % 26;
+		sprintf(gd->disk_name, "sd%c%c%c", 
+			'a' + m1, 'a' + m2, 'a' + m3);
 	}
 
 	strcpy(gd->devfs_name, sdp->devfs_name);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/serial/8250.c 901-mjb1.1/drivers/serial/8250.c
--- 000-virgin/drivers/serial/8250.c	Wed Jul  2 21:59:11 2003
+++ 901-mjb1.1/drivers/serial/8250.c	Wed Aug 13 20:29:29 2003
@@ -2117,9 +2117,116 @@ void serial8250_resume_port(int line, u3
 	uart_resume_port(&serial8250_reg, &serial8250_ports[line].port, level);
 }
 
-static int __init serial8250_init(void)
+#ifdef CONFIG_X86_REMOTE_DEBUG
+/* 
+ *  Takes:
+ *	ttyS - integer specifying which serial port to use for debugging
+ *	baud - baud rate of specified serial port
+ *  Returns:
+ *	port for use by the gdb serial driver
+ */
+int gdb_serial_setup(int ttyS, int baud, int *port, int *irq)
+{
+        struct uart_8250_port *up;
+        unsigned cval;
+        int     bits = 8;
+        int     parity = 'n';
+        int     cflag = CREAD | HUPCL | CLOCAL;
+        int     quot = 0;
+
+        /*
+         *      Now construct a cflag setting.
+         */
+        switch(baud) {
+                case 1200:
+                        cflag |= B1200;
+                        break;
+                case 2400:
+                        cflag |= B2400;
+                        break;
+                case 4800:
+                        cflag |= B4800;
+                        break;
+                case 19200:
+                        cflag |= B19200;
+                        break;
+                case 38400:
+                        cflag |= B38400;
+                        break;
+                case 57600:
+                        cflag |= B57600;
+                        break;
+                case 115200:
+                        cflag |= B115200;
+                        break;
+                case 9600:
+                default:
+                        cflag |= B9600;
+                        break;
+        }
+        switch(bits) {
+                case 7:
+                        cflag |= CS7;
+                        break;
+                default:
+                case 8:
+                        cflag |= CS8;
+                        break;
+        }
+        switch(parity) {
+                case 'o': case 'O':
+                        cflag |= PARODD;
+                        break;
+                case 'e': case 'E':
+                        cflag |= PARENB;
+                        break;
+        }
+
+        /*
+         *      Divisor, bytesize and parity
+         */
+
+        up =  &serial8250_ports[ttyS];
+//	ser->flags &= ~ASYNC_BOOT_AUTOCONF;
+        quot = ( 1843200 / 16 ) / baud;
+        cval = cflag & (CSIZE | CSTOPB);
+        cval >>= 4;
+        if (cflag & PARENB)
+                cval |= UART_LCR_PARITY;
+        if (!(cflag & PARODD))
+                cval |= UART_LCR_EPAR;
+
+        /*
+         *      Disable UART interrupts, set DTR and RTS high
+         *      and set speed.
+         */
+	cval = 0x3;
+        serial_outp(up, UART_LCR, cval | UART_LCR_DLAB);       /* set DLAB */
+        serial_outp(up, UART_DLL, quot & 0xff);         /* LS of divisor */
+        serial_outp(up, UART_DLM, quot >> 8);           /* MS of divisor */
+        serial_outp(up, UART_LCR, cval);                /* reset DLAB */
+        serial_outp(up, UART_IER, UART_IER_RDI);        /* turn on interrupts*/
+        serial_outp(up, UART_MCR, UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS);
+
+        /*
+         *      If we read 0xff from the LSR, there is no UART here.
+         */
+        if (serial_inp(up, UART_LSR) == 0xff)
+                return 1;
+	*port = up->port.iobase;
+	*irq = up->port.irq;
+//	serial8250_shutdown(&up->port);
+        return 0;
+}
+#endif
+
+int serial8250_init(void)
 {
 	int ret, i;
+	static int didit = 0;
+
+	if (didit++)
+		return 0;
 
 	printk(KERN_INFO "Serial: 8250/16550 driver $Revision: 1.90 $ "
 		"IRQ sharing %sabled\n", share_irqs ? "en" : "dis");
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/drivers/serial/core.c 901-mjb1.1/drivers/serial/core.c
--- 000-virgin/drivers/serial/core.c	Tue Aug  5 20:01:53 2003
+++ 901-mjb1.1/drivers/serial/core.c	Wed Aug 13 20:29:29 2003
@@ -33,6 +33,10 @@
 #include <linux/device.h>
 #include <linux/serial.h> /* for serial_state and serial_icounter_struct */
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#include <linux/gdb.h>
+#endif
+
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 
@@ -1130,6 +1134,16 @@ uart_ioctl(struct tty_struct *tty, struc
 	 * protected against the tty being hung up.
 	 */
 	switch (cmd) {
+#ifdef CONFIG_X86_REMOTE_DEBUG
+	case TIOCGDB:
+		ret = -ENOTTY;
+		if (capable(CAP_SYS_ADMIN)) {
+			gdb_ttyS = MINOR(tty->device) & 0x03F;
+			gdb_baud = tty_get_baud_rate(tty);
+			ret = gdb_hook();
+		}
+		break;
+#endif
 	case TIOCSERGETLSR: /* Get line status register */
 		ret = uart_get_lsr_info(state, (unsigned int *)arg);
 		break;
@@ -1146,6 +1160,30 @@ uart_ioctl(struct tty_struct *tty, struc
  out:
 	return ret;
 }
+
+ /*
+ * ------------------------------------------------------------
+ * Serial GDB driver (most in gdbserial.c)
+ * ------------------------------------------------------------
+ */
+
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#ifdef CONFIG_GDB_CONSOLE
+static struct console gdbcons = {
+	name:		"gdb",
+	write:		gdb_console_write,
+	flags:		CON_PRINTBUFFER | CON_ENABLED,
+	index:		-1,
+};
+#endif
+
+#ifdef CONFIG_GDB_CONSOLE
+void __init gdb_console_init(void)
+{
+	register_console(&gdbcons);
+}
+#endif
+#endif /* CONFIG_X86_REMOTE_DEBUG */
 
 static void uart_set_termios(struct tty_struct *tty, struct termios *old_termios)
 {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/aio.c 901-mjb1.1/fs/aio.c
--- 000-virgin/fs/aio.c	Tue Aug  5 20:01:42 2003
+++ 901-mjb1.1/fs/aio.c	Wed Aug 13 20:48:56 2003
@@ -204,6 +204,7 @@ static struct kioctx *ioctx_alloc(unsign
 {
 	struct mm_struct *mm;
 	struct kioctx *ctx;
+	int ret = 0;
 
 	/* Prevent overflows */
 	if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -233,7 +234,8 @@ static struct kioctx *ioctx_alloc(unsign
 	INIT_LIST_HEAD(&ctx->run_list);
 	INIT_WORK(&ctx->wq, aio_kick_handler, ctx);
 
-	if (aio_setup_ring(ctx) < 0)
+	ret = aio_setup_ring(ctx);
+	if (unlikely(ret < 0))
 		goto out_freectx;
 
 	/* limit the number of system wide aios */
@@ -259,7 +261,7 @@ out_cleanup:
 
 out_freectx:
 	kmem_cache_free(kioctx_cachep, ctx);
-	ctx = ERR_PTR(-ENOMEM);
+	ctx = ERR_PTR(ret);
 
 	dprintk("aio: error allocating ioctx %p\n", ctx);
 	return ctx;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/binfmt_aout.c 901-mjb1.1/fs/binfmt_aout.c
--- 000-virgin/fs/binfmt_aout.c	Tue Aug  5 20:01:42 2003
+++ 901-mjb1.1/fs/binfmt_aout.c	Wed Aug 13 20:51:52 2003
@@ -310,7 +310,7 @@ static int load_aout_binary(struct linux
 		(current->mm->start_brk = N_BSSADDR(ex));
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 
-	current->mm->rss = 0;
+	zero_rss(current->mm);
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/binfmt_elf.c 901-mjb1.1/fs/binfmt_elf.c
--- 000-virgin/fs/binfmt_elf.c	Tue Aug  5 20:01:54 2003
+++ 901-mjb1.1/fs/binfmt_elf.c	Wed Aug 13 20:51:52 2003
@@ -634,7 +634,7 @@ static int load_elf_binary(struct linux_
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
-	current->mm->rss = 0;
+	zero_rss(current->mm);
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 	retval = setup_arg_pages(bprm);
 	if (retval < 0) {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/binfmt_flat.c 901-mjb1.1/fs/binfmt_flat.c
--- 000-virgin/fs/binfmt_flat.c	Wed Aug 13 20:24:28 2003
+++ 901-mjb1.1/fs/binfmt_flat.c	Wed Aug 13 20:51:52 2003
@@ -643,7 +643,7 @@ static int load_flat_file(struct linux_b
 		current->mm->start_brk = datapos + data_len + bss_len;
 		current->mm->brk = (current->mm->start_brk + 3) & ~3;
 		current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
-		current->mm->rss = 0;
+		zero_rss(current->mm);
 	}
 
 	if (flags & FLAT_FLAG_KTRACE)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/binfmt_som.c 901-mjb1.1/fs/binfmt_som.c
--- 000-virgin/fs/binfmt_som.c	Thu Feb 13 11:08:11 2003
+++ 901-mjb1.1/fs/binfmt_som.c	Wed Aug 13 20:51:52 2003
@@ -259,7 +259,7 @@ load_som_binary(struct linux_binprm * bp
 	create_som_tables(bprm);
 
 	current->mm->start_stack = bprm->p;
-	current->mm->rss = 0;
+	zero_rss(current->mm);
 
 #if 0
 	printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/exec.c 901-mjb1.1/fs/exec.c
--- 000-virgin/fs/exec.c	Wed Aug 13 20:24:28 2003
+++ 901-mjb1.1/fs/exec.c	Wed Aug 13 20:51:52 2003
@@ -317,10 +317,11 @@ void put_dirty_page(struct task_struct *
 	}
 	lru_cache_add_active(page);
 	flush_dcache_page(page);
+	SetPageAnon(page);
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
-	tsk->mm->rss++;
+	inc_rss(tsk->mm, page);
 	spin_unlock(&tsk->mm->page_table_lock);
 
 	/* no need for flush_tlb */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/inode.c 901-mjb1.1/fs/inode.c
--- 000-virgin/fs/inode.c	Wed Aug 13 20:24:28 2003
+++ 901-mjb1.1/fs/inode.c	Wed Aug 13 20:51:50 2003
@@ -145,6 +145,9 @@ static struct inode *alloc_inode(struct 
 		mapping->dirtied_when = 0;
 		mapping->assoc_mapping = NULL;
 		mapping->backing_dev_info = &default_backing_dev_info;
+#ifdef CONFIG_NUMA
+		mapping->binding = NULL;
+#endif
 		if (sb->s_bdev)
 			mapping->backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 		memset(&inode->u, 0, sizeof(inode->u));
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/proc/array.c 901-mjb1.1/fs/proc/array.c
--- 000-virgin/fs/proc/array.c	Sat May 10 18:35:00 2003
+++ 901-mjb1.1/fs/proc/array.c	Wed Aug 13 20:47:27 2003
@@ -336,7 +336,7 @@ int proc_pid_stat(struct task_struct *ta
 	read_unlock(&tasklist_lock);
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %lu %lu %lu\n",
 		task->pid,
 		task->comm,
 		state,
@@ -382,7 +382,10 @@ int proc_pid_stat(struct task_struct *ta
 		task->exit_signal,
 		task_cpu(task),
 		task->rt_priority,
-		task->policy);
+		task->policy,
+		jiffies_to_clock_t(task->sched_info.inter_arrival_time),
+		jiffies_to_clock_t(task->sched_info.service_time),
+		jiffies_to_clock_t(task->sched_info.response_time));
 	if(mm)
 		mmput(mm);
 	return res;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/proc/base.c 901-mjb1.1/fs/proc/base.c
--- 000-virgin/fs/proc/base.c	Tue Aug  5 20:01:42 2003
+++ 901-mjb1.1/fs/proc/base.c	Wed Aug 13 20:51:45 2003
@@ -1390,62 +1390,38 @@ out:
 }
 
 #define PROC_NUMBUF 10
-#define PROC_MAXPIDS 20
 
-/*
- * Get a few pid's to return for filldir - we need to hold the
- * tasklist lock while doing this, and we must release it before
- * we actually do the filldir itself, so we use a temp buffer..
- */
-static int get_pid_list(int index, unsigned int *pids)
-{
-	struct task_struct *p;
-	int nr_pids = 0;
-
-	index--;
-	read_lock(&tasklist_lock);
-	for_each_process(p) {
-		int pid = p->pid;
-		if (!pid_alive(p))
-			continue;
-		if (--index >= 0)
-			continue;
-		pids[nr_pids] = pid;
-		nr_pids++;
-		if (nr_pids >= PROC_MAXPIDS)
-			break;
-	}
-	read_unlock(&tasklist_lock);
-	return nr_pids;
-}
 
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	unsigned int pid_array[PROC_MAXPIDS];
 	char buf[PROC_NUMBUF];
 	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-	unsigned int nr_pids, i;
+	int pid;
 
 	if (!nr) {
 		ino_t ino = fake_ino(0,PROC_PID_INO);
 		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
 			return 0;
 		filp->f_pos++;
-		nr++;
+		nr = 1;
 	}
+	pid = nr - 1;
+	for (;;) {
+		unsigned long i, j;
+		ino_t ino;
 
-	nr_pids = get_pid_list(nr, pid_array);
-
-	for (i = 0; i < nr_pids; i++) {
-		int pid = pid_array[i];
-		ino_t ino = fake_ino(pid,PROC_PID_INO);
-		unsigned long j = PROC_NUMBUF;
+		pid = find_next_pid(pid);
+		if (pid < 0)
+			break;
 
-		do buf[--j] = '0' + (pid % 10); while (pid/=10);
+		i = pid;
+	       	j = PROC_NUMBUF;
+		do buf[--j] = '0' + (i % 10); while (i/=10);
 
+		ino = fake_ino(pid,PROC_PID_INO);
 		if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
 			break;
-		filp->f_pos++;
+		filp->f_pos = pid + 1 + FIRST_PROCESS_ENTRY;
 	}
 	return 0;
 }
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/proc/proc_misc.c 901-mjb1.1/fs/proc/proc_misc.c
--- 000-virgin/fs/proc/proc_misc.c	Tue Aug  5 20:01:54 2003
+++ 901-mjb1.1/fs/proc/proc_misc.c	Wed Aug 13 20:51:40 2003
@@ -134,6 +134,41 @@ static struct vmalloc_info get_vmalloc_i
 	return vmi;
 }
 
+static int real_loadavg_read_proc(char *page, char **start, off_t off,
+				 int count, int *eof, void *data)
+{
+	int a, b, c, cpu;
+	int len;
+
+	a = tasks_running[0] + (FIXED_1/200);
+	b = tasks_running[1] + (FIXED_1/200);
+	c = tasks_running[2] + (FIXED_1/200);
+	len = sprintf(page,"Domain    load1    load2    load3  nr_run/nr_thrd\n");
+	len += sprintf(page+len,"SYSTEM %5d.%02d %5d.%02d %5d.%02d %7ld/%7d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		nr_running(), nr_threads);
+	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+		unsigned long nr_running;
+		if (!cpu_online(cpu))
+			continue;
+		preempt_disable();
+		a = per_cpu(cpu_tasks_running,cpu)[0] + (FIXED_1/200);
+		b = per_cpu(cpu_tasks_running,cpu)[1] + (FIXED_1/200);
+		c = per_cpu(cpu_tasks_running,cpu)[2] + (FIXED_1/200);
+		nr_running = nr_running_cpu(cpu);
+		preempt_enable();
+		len += sprintf(page+len, "%5d  %5d.%02d %5d.%02d %5d.%02d %7ld/%7d\n",
+			cpu,
+			LOAD_INT(a), LOAD_FRAC(a), 
+			LOAD_INT(b), LOAD_FRAC(b),
+			LOAD_INT(c), LOAD_FRAC(c),
+			nr_running, nr_threads);
+	}
+	return proc_calc_metrics(page, start, off, count, eof, len);
+}
+
 static int uptime_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -342,6 +377,71 @@ static struct file_operations proc_modul
 };
 #endif
 
+#ifdef CONFIG_NUMA
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+static int show_meminfo_numa (struct seq_file *m, void *v)
+{
+	int *d = v;
+	int nid = *d;
+	struct sysinfo i;
+	si_meminfo_node(&i, nid);
+	seq_printf(m, "\n"
+			"Node %d MemTotal:     %8lu kB\n"
+			"Node %d MemFree:      %8lu kB\n"
+			"Node %d MemUsed:      %8lu kB\n"
+			"Node %d HighTotal:    %8lu kB\n"
+			"Node %d HighFree:     %8lu kB\n"
+			"Node %d LowTotal:     %8lu kB\n"
+			"Node %d LowFree:      %8lu kB\n",
+			nid, K(i.totalram),
+			nid, K(i.freeram),
+			nid, K(i.totalram-i.freeram),
+			nid, K(i.totalhigh),
+			nid, K(i.freehigh),
+			nid, K(i.totalram-i.totalhigh),
+			nid, K(i.freeram-i.freehigh));
+
+	return 0;
+}
+#undef K 
+
+extern struct seq_operations meminfo_numa_op;
+static int meminfo_numa_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file,&meminfo_numa_op);
+}
+
+static struct file_operations proc_meminfo_numa_operations = {
+        open:           meminfo_numa_open,
+        read:           seq_read,
+        llseek:         seq_lseek,
+        release:        seq_release,
+};
+
+static void *meminfo_numa_start(struct seq_file *m, loff_t *pos)
+{
+	return  *pos < numnodes ? pos : NULL;
+}
+
+static void *meminfo_numa_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return meminfo_numa_start(m, pos);
+}
+
+static void meminfo_numa_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations meminfo_numa_op = {
+	.start = meminfo_numa_start,
+	.next  = meminfo_numa_next,
+	.stop  = meminfo_numa_stop,
+	.show  = show_meminfo_numa,
+};
+
+#endif
+
 extern struct seq_operations slabinfo_op;
 extern ssize_t slabinfo_write(struct file *, const char *, size_t, loff_t *);
 static int slabinfo_open(struct inode *inode, struct file *file)
@@ -399,14 +499,20 @@ static int kstat_read_proc(char *page, c
 		jiffies_to_clock_t(idle),
 		jiffies_to_clock_t(iowait));
 	for (i = 0 ; i < NR_CPUS; i++){
-		if (!cpu_online(i)) continue;
-		len += sprintf(page + len, "cpu%d %u %u %u %u %u\n",
+		struct sched_info info;
+		if (!cpu_online(i))
+			continue;
+		cpu_sched_info(&info, i);
+		len += sprintf(page + len, "cpu%d %u %u %u %u %u %u %u %u\n",
 			i,
 			jiffies_to_clock_t(kstat_cpu(i).cpustat.user),
 			jiffies_to_clock_t(kstat_cpu(i).cpustat.nice),
 			jiffies_to_clock_t(kstat_cpu(i).cpustat.system),
 			jiffies_to_clock_t(kstat_cpu(i).cpustat.idle),
-			jiffies_to_clock_t(kstat_cpu(i).cpustat.iowait));
+			jiffies_to_clock_t(kstat_cpu(i).cpustat.iowait),
+			(uint) jiffies_to_clock_t(info.inter_arrival_time),
+			(uint) jiffies_to_clock_t(info.service_time),
+			(uint) jiffies_to_clock_t(info.response_time));
 	}
 	len += sprintf(page + len, "intr %u", sum);
 
@@ -603,6 +709,36 @@ static void create_seq_entry(char *name,
 		entry->proc_fops = f;
 }
 
+#ifdef CONFIG_LOCKMETER
+extern ssize_t get_lockmeter_info(char *, size_t, loff_t *);
+extern ssize_t put_lockmeter_info(const char *, size_t);
+extern int get_lockmeter_info_size(void);
+
+/*
+ * This function accesses lock metering information. 
+ */
+static ssize_t read_lockmeter(struct file *file, char *buf,
+			      size_t count, loff_t *ppos)
+{
+	return get_lockmeter_info(buf, count, ppos);
+}
+
+/*
+ * Writing to /proc/lockmeter resets the counters
+ */
+static ssize_t write_lockmeter(struct file * file, const char * buf,
+			       size_t count, loff_t *ppos)
+{
+	return put_lockmeter_info(buf, count);
+}
+
+static struct file_operations proc_lockmeter_operations = {
+	NULL,           /* lseek */
+	read:		read_lockmeter,
+	write:		write_lockmeter,
+};
+#endif  /* CONFIG_LOCKMETER */
+
 void __init proc_misc_init(void)
 {
 	struct proc_dir_entry *entry;
@@ -611,6 +747,7 @@ void __init proc_misc_init(void)
 		int (*read_proc)(char*,char**,off_t,int,int*,void*);
 	} *p, simple_ones[] = {
 		{"loadavg",     loadavg_read_proc},
+		{"real_loadavg",real_loadavg_read_proc},
 		{"uptime",	uptime_read_proc},
 		{"meminfo",	meminfo_read_proc},
 		{"version",	version_read_proc},
@@ -650,6 +787,9 @@ void __init proc_misc_init(void)
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
 #endif
+#ifdef CONFIG_NUMA
+	create_seq_entry("meminfo.numa",0,&proc_meminfo_numa_operations);
+#endif
 	proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
 	if (proc_root_kcore) {
 		proc_root_kcore->proc_fops = &proc_kcore_operations;
@@ -667,6 +807,13 @@ void __init proc_misc_init(void)
 	entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
 	if (entry)
 		entry->proc_fops = &proc_sysrq_trigger_operations;
+#endif
+#ifdef CONFIG_LOCKMETER
+	entry = create_proc_entry("lockmeter", S_IWUSR | S_IRUGO, NULL);
+	if (entry) {
+		entry->proc_fops = &proc_lockmeter_operations;
+		entry->size = get_lockmeter_info_size();
+	}
 #endif
 #ifdef CONFIG_PPC32
 	{
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/fs/proc/task_mmu.c 901-mjb1.1/fs/proc/task_mmu.c
--- 000-virgin/fs/proc/task_mmu.c	Tue Jun 24 21:29:23 2003
+++ 901-mjb1.1/fs/proc/task_mmu.c	Wed Aug 13 20:51:52 2003
@@ -3,6 +3,22 @@
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_NUMA
+char *task_mem_pernode(struct mm_struct *mm, char *buffer)
+{
+	int nid;
+
+	for (nid = 0; nid < MAX_NUMNODES; nid++){
+		buffer += sprintf(buffer, "VmRSS-node_%d:\t%8lu kb\n",
+			nid, mm->pernode_rss[nid] << (PAGE_SHIFT-10));
+	}
+
+	return buffer;
+}
+#else /* !CONFIG_NUMA */
+#define task_mem_pernode(mm, buffer)	(buffer)
+#endif /* CONFIG_NUMA */
+
 char *task_mem(struct mm_struct *mm, char *buffer)
 {
 	unsigned long data = 0, stack = 0, exec = 0, lib = 0;
@@ -39,6 +55,7 @@ char *task_mem(struct mm_struct *mm, cha
 		mm->rss << (PAGE_SHIFT-10),
 		data - stack, stack,
 		exec - lib, lib);
+	buffer = task_mem_pernode(mm, buffer);
 	up_read(&mm->mmap_sem);
 	return buffer;
 }
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-alpha/lockmeter.h 901-mjb1.1/include/asm-alpha/lockmeter.h
--- 000-virgin/include/asm-alpha/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-alpha/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,90 @@
+/*
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.h by Jack Steiner (steiner@sgi.com)
+ *
+ *  Modified by Peter Rival (frival@zk3.dec.com)
+ */
+
+#ifndef _ALPHA_LOCKMETER_H
+#define _ALPHA_LOCKMETER_H
+
+#include <asm/hwrpb.h>
+#define CPU_CYCLE_FREQUENCY	hwrpb->cycle_freq
+
+#define get_cycles64()		get_cycles()
+
+#define THIS_CPU_NUMBER		smp_processor_id()
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)
+#define local_irq_save(x) \
+	__save_and_cli(x)
+#define local_irq_restore(x) \
+	__restore_flags(x)
+#endif	/* Linux version 2.2.x */
+
+#define SPINLOCK_MAGIC_INIT /**/
+
+/*
+ * Macros to cache and retrieve an index value inside of a lock
+ * these macros assume that there are less than 65536 simultaneous
+ * (read mode) holders of a rwlock.
+ * We also assume that the hash table has less than 32767 entries.
+ * the high order bit is used for write locking a rw_lock
+ * Note: although these defines and macros are the same as what is being used
+ *       in include/asm-i386/lockmeter.h, they are present here to easily
+ *	 allow an alternate Alpha implementation.
+ */
+/*
+ * instrumented spinlock structure -- never used to allocate storage
+ * only used in macros below to overlay a spinlock_t
+ */
+typedef struct inst_spinlock_s {
+	/* remember, Alpha is little endian */
+	unsigned short lock;
+	unsigned short index;
+} inst_spinlock_t;
+#define PUT_INDEX(lock_ptr,indexv)	((inst_spinlock_t *)(lock_ptr))->index = indexv
+#define GET_INDEX(lock_ptr)		((inst_spinlock_t *)(lock_ptr))->index
+
+/*
+ * macros to cache and retrieve an index value in a read/write lock
+ * as well as the cpu where a reader busy period started
+ * we use the 2nd word (the debug word) for this, so require the
+ * debug word to be present
+ */
+/*
+ * instrumented rwlock structure -- never used to allocate storage
+ * only used in macros below to overlay a rwlock_t
+ */
+typedef struct inst_rwlock_s {
+	volatile int lock;
+	unsigned short index;
+	unsigned short cpu;
+} inst_rwlock_t;
+#define PUT_RWINDEX(rwlock_ptr,indexv)	((inst_rwlock_t *)(rwlock_ptr))->index = indexv
+#define GET_RWINDEX(rwlock_ptr)		((inst_rwlock_t *)(rwlock_ptr))->index
+#define PUT_RW_CPU(rwlock_ptr,cpuv)	((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
+#define GET_RW_CPU(rwlock_ptr)		((inst_rwlock_t *)(rwlock_ptr))->cpu
+
+/*
+ * return true if rwlock is write locked
+ * (note that other lock attempts can cause the lock value to be negative)
+ */
+#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) (((inst_rwlock_t *)rwlock_ptr)->lock & 1)
+#define IABS(x) ((x) > 0 ? (x) : -(x))
+
+#define RWLOCK_READERS(rwlock_ptr)	rwlock_readers(rwlock_ptr)
+extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
+{
+	int tmp = (int) ((inst_rwlock_t *)rwlock_ptr)->lock;
+	/* readers subtract 2, so we have to:		*/
+	/* 	- andnot off a possible writer (bit 0)	*/
+	/*	- get the absolute value		*/
+	/*	- divide by 2 (right shift by one)	*/
+	/* to find the number of readers		*/
+	if (tmp == 0) return(0);
+	else return(IABS(tmp & ~1)>>1);
+}
+
+#endif /* _ALPHA_LOCKMETER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-alpha/pgtable.h 901-mjb1.1/include/asm-alpha/pgtable.h
--- 000-virgin/include/asm-alpha/pgtable.h	Tue Apr  8 14:38:20 2003
+++ 901-mjb1.1/include/asm-alpha/pgtable.h	Wed Aug 13 20:51:03 2003
@@ -39,6 +39,7 @@
 #define PTRS_PER_PMD	(1UL << (PAGE_SHIFT-3))
 #define PTRS_PER_PGD	(1UL << (PAGE_SHIFT-3))
 #define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 /* Number of pointers that fit on a page:  this will go away. */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-alpha/spinlock.h 901-mjb1.1/include/asm-alpha/spinlock.h
--- 000-virgin/include/asm-alpha/spinlock.h	Fri May 30 19:02:20 2003
+++ 901-mjb1.1/include/asm-alpha/spinlock.h	Wed Aug 13 20:29:36 2003
@@ -6,6 +6,10 @@
 #include <linux/kernel.h>
 #include <asm/current.h>
 
+#ifdef CONFIG_LOCKMETER
+#undef DEBUG_SPINLOCK
+#undef DEBUG_RWLOCK
+#endif
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -95,9 +99,18 @@ static inline int _raw_spin_trylock(spin
 
 typedef struct {
 	volatile int write_lock:1, read_counter:31;
+#ifdef CONFIG_LOCKMETER
+	/* required for LOCKMETER since all bits in lock are used */
+	/* need this storage for CPU and lock INDEX ............. */
+	unsigned magic;
+#endif
 } /*__attribute__((aligned(32)))*/ rwlock_t;
 
+#ifdef CONFIG_LOCKMETER
+#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 }
+#else
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
+#endif
 
 #define rwlock_init(x)	do { *(x) = RW_LOCK_UNLOCKED; } while(0)
 #define rwlock_is_locked(x)	(*(volatile int *)(x) != 0)
@@ -168,5 +181,42 @@ static inline void _raw_read_unlock(rwlo
 	: "=m" (*lock), "=&r" (regx)
 	: "m" (*lock) : "memory");
 }
+
+#ifdef CONFIG_LOCKMETER
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+	long temp,result;
+
+	__asm__ __volatile__(
+	"	ldl_l %1,%0\n"
+	"	mov $31,%2\n"
+	"	bne %1,1f\n"
+	"	or $31,1,%2\n"
+	"	stl_c %2,%0\n"
+	"1:	mb\n"
+	: "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result)
+	: "m" (*(volatile int *)lock)
+	);
+
+	return (result);
+}
+
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+	unsigned long temp,result;
+
+	__asm__ __volatile__(
+	"	ldl_l %1,%0\n"
+	"	mov $31,%2\n"
+	"	blbs %1,1f\n"
+	"	subl %1,2,%2\n"
+	"	stl_c %2,%0\n"
+	"1:	mb\n"
+	: "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result)
+	: "m" (*(volatile int *)lock)
+	);
+	return (result);
+}
+#endif /* CONFIG_LOCKMETER */
 
 #endif /* _ALPHA_SPINLOCK_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-arm/pgtable.h 901-mjb1.1/include/asm-arm/pgtable.h
--- 000-virgin/include/asm-arm/pgtable.h	Mon Mar 17 21:43:48 2003
+++ 901-mjb1.1/include/asm-arm/pgtable.h	Wed Aug 13 20:51:03 2003
@@ -45,6 +45,7 @@ extern void __pgd_error(const char *file
 
 #define FIRST_USER_PGD_NR	1
 #define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) - FIRST_USER_PGD_NR)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 
 /*
  * The table below defines the page protection levels that we insert into our
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-cris/pgtable.h 901-mjb1.1/include/asm-cris/pgtable.h
--- 000-virgin/include/asm-cris/pgtable.h	Tue Aug  5 19:59:16 2003
+++ 901-mjb1.1/include/asm-cris/pgtable.h	Wed Aug 13 20:51:03 2003
@@ -69,6 +69,7 @@ extern void paging_init(void);
  */
 
 #define USER_PTRS_PER_PGD       (TASK_SIZE/PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR       0
 
 /* zero page used for uninitialized stuff */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-generic/tlb.h 901-mjb1.1/include/asm-generic/tlb.h
--- 000-virgin/include/asm-generic/tlb.h	Fri May 30 19:02:20 2003
+++ 901-mjb1.1/include/asm-generic/tlb.h	Wed Aug 13 20:51:52 2003
@@ -39,7 +39,6 @@ struct mmu_gather {
 	unsigned int		nr;	/* set to ~0U means fast mode */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
-	unsigned long		freed;
 	struct page *		pages[FREE_PTE_NR];
 };
 
@@ -60,7 +59,6 @@ tlb_gather_mmu(struct mm_struct *mm, uns
 	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
 
 	tlb->fullmm = full_mm_flush;
-	tlb->freed = 0;
 
 	return tlb;
 }
@@ -85,13 +83,6 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	int freed = tlb->freed;
-	struct mm_struct *mm = tlb->mm;
-	int rss = mm->rss;
-
-	if (rss < freed)
-		freed = rss;
-	mm->rss = rss - freed;
 	tlb_flush_mmu(tlb, start, end);
 
 	/* keep the page table cache within bounds */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/bug.h 901-mjb1.1/include/asm-i386/bug.h
--- 000-virgin/include/asm-i386/bug.h	Sat Jun 14 18:37:35 2003
+++ 901-mjb1.1/include/asm-i386/bug.h	Wed Aug 13 20:29:29 2003
@@ -9,6 +9,11 @@
  * undefined" opcode for parsing in the trap handler.
  */
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#define BUG() do {                                            \
+	asm ("int $0x3");                                       \
+} while (0)
+#else
 #if 1	/* Set to zero for a slightly smaller kernel */
 #define BUG()				\
  __asm__ __volatile__(	"ud2\n"		\
@@ -17,6 +22,7 @@
 			 : : "i" (__LINE__), "i" (__FILE__))
 #else
 #define BUG() __asm__ __volatile__("ud2\n")
+#endif
 #endif
 
 #define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/cpu.h 901-mjb1.1/include/asm-i386/cpu.h
--- 000-virgin/include/asm-i386/cpu.h	Sat Jun 14 18:37:35 2003
+++ 901-mjb1.1/include/asm-i386/cpu.h	Wed Aug 13 20:48:49 2003
@@ -23,4 +23,6 @@ static inline int arch_register_cpu(int 
 	return register_cpu(&cpu_devices[num].cpu, num, parent);
 }
 
+extern void setup_cpu_idt(void);
+extern void setup_node_idts(void);
 #endif /* _ASM_I386_CPU_H_ */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/desc.h 901-mjb1.1/include/asm-i386/desc.h
--- 000-virgin/include/asm-i386/desc.h	Tue Feb 25 23:03:50 2003
+++ 901-mjb1.1/include/asm-i386/desc.h	Wed Aug 13 20:48:49 2003
@@ -2,6 +2,7 @@
 #define __ARCH_DESC_H
 
 #include <asm/ldt.h>
+#include <asm/numnodes.h>
 #include <asm/segment.h>
 
 #ifndef __ASSEMBLY__
@@ -12,14 +13,15 @@
 #include <asm/mmu.h>
 
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+extern struct desc_struct node_idt_table[MAX_NUMNODES][IDT_ENTRIES];
 
-struct Xgt_desc_struct {
+struct Xdt_desc_struct {
 	unsigned short size;
 	unsigned long address __attribute__((packed));
 	unsigned short pad;
 } __attribute__ ((packed));
 
-extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+extern struct Xdt_desc_struct node_idt_descr[MAX_NUMNODES], cpu_gdt_descr[NR_CPUS]; 
 
 #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
 #define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
@@ -29,7 +31,8 @@ extern struct Xgt_desc_struct idt_descr,
  * something other than this.
  */
 extern struct desc_struct default_ldt[];
-extern void set_intr_gate(unsigned int irq, void * addr);
+extern void node_set_intr_gate(unsigned int node, unsigned int vector, void * addr);
+extern void set_intr_gate(unsigned int n, void *addr);
 
 #define _set_tssldt_desc(n,addr,limit,type) \
 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/early_printk.h 901-mjb1.1/include/asm-i386/early_printk.h
--- 000-virgin/include/asm-i386/early_printk.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-i386/early_printk.h	Wed Aug 13 21:05:43 2003
@@ -0,0 +1,8 @@
+#ifndef __X86_EARLY_PRINTK_H_I386_
+#define __X86_EARLY_PRINTK_H_I386_
+
+#define VGABASE  0xB8000
+#define SERIAL_BASES { 0x3f8, 0x2f8 }
+#define SERIAL_BASES_LEN 2
+
+#endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/ioctls.h 901-mjb1.1/include/asm-i386/ioctls.h
--- 000-virgin/include/asm-i386/ioctls.h	Tue Apr  8 14:38:20 2003
+++ 901-mjb1.1/include/asm-i386/ioctls.h	Wed Aug 13 20:29:29 2003
@@ -68,6 +68,7 @@
 #define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
 #define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
 #define FIOQSIZE	0x5460
+#define TIOCGDB         0x547F  /* enable GDB stub mode on this tty */
 
 /* Used for packet mode */
 #define TIOCPKT_DATA		 0
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/lockmeter.h 901-mjb1.1/include/asm-i386/lockmeter.h
--- 000-virgin/include/asm-i386/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-i386/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,127 @@
+/*
+ *  Copyright (C) 1999,2000 Silicon Graphics, Inc.
+ *
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.h by Jack Steiner (steiner@sgi.com)
+ *
+ *  Modified by Ray Bryant (raybry@us.ibm.com)
+ *  Changes Copyright (C) 2000 IBM, Inc.
+ *  Added save of index in spinlock_t to improve efficiency
+ *  of "hold" time reporting for spinlocks.
+ *  Added support for hold time statistics for read and write
+ *  locks.
+ *  Moved machine dependent code here from include/lockmeter.h.
+ *
+ */
+
+#ifndef _I386_LOCKMETER_H
+#define _I386_LOCKMETER_H
+
+#include <asm/spinlock.h>
+#include <asm/rwlock.h>
+
+#include <linux/version.h>
+
+#ifdef __KERNEL__
+extern unsigned long cpu_khz;
+#define CPU_CYCLE_FREQUENCY	(cpu_khz * 1000)
+#else
+#define CPU_CYCLE_FREQUENCY	450000000
+#endif
+
+#define THIS_CPU_NUMBER		smp_processor_id()
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)
+#define local_irq_save(x) \
+    __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+
+#define local_irq_restore(x) \
+    __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory")
+#endif	/* Linux version 2.2.x */
+
+/*
+ * macros to cache and retrieve an index value inside of a spin lock  
+ * these macros assume that there are less than 65536 simultaneous
+ * (read mode) holders of a rwlock.  Not normally a problem!!
+ * we also assume that the hash table has less than 65535 entries.
+ */
+/*
+ * instrumented spinlock structure -- never used to allocate storage
+ * only used in macros below to overlay a spinlock_t
+ */
+typedef struct inst_spinlock_s {
+	/* remember, Intel is little endian */
+	unsigned short lock;
+	unsigned short index;
+} inst_spinlock_t;
+#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv
+#define GET_INDEX(lock_ptr)        ((inst_spinlock_t *)(lock_ptr))->index
+
+/*
+ * macros to cache and retrieve an index value in a read/write lock
+ * as well as the cpu where a reader busy period started
+ * we use the 2nd word (the debug word) for this, so require the
+ * debug word to be present
+ */
+/*
+ * instrumented rwlock structure -- never used to allocate storage
+ * only used in macros below to overlay a rwlock_t
+ */
+typedef struct inst_rwlock_s {
+	volatile int lock;
+	unsigned short index;
+	unsigned short cpu;
+} inst_rwlock_t;
+#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
+#define GET_RWINDEX(rwlock_ptr)        ((inst_rwlock_t *)(rwlock_ptr))->index
+#define PUT_RW_CPU(rwlock_ptr,cpuv)    ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
+#define GET_RW_CPU(rwlock_ptr)         ((inst_rwlock_t *)(rwlock_ptr))->cpu
+
+/* 
+ * return the number of readers for a rwlock_t
+ */
+#define RWLOCK_READERS(rwlock_ptr)   rwlock_readers(rwlock_ptr)
+
+extern inline int rwlock_readers(rwlock_t *rwlock_ptr) 
+{
+	int tmp = (int) rwlock_ptr->lock;
+	/* read and write lock attempts may cause the lock value to temporarily */
+	/* be negative.  Until it is >= 0 we know nothing (i. e. can't tell if  */
+	/* is -1 because it was write locked and somebody tried to read lock it */
+	/* or if it is -1 because it was read locked and somebody tried to write*/
+	/* lock it. ........................................................... */
+	do {
+		tmp = (int) rwlock_ptr->lock;
+	} while (tmp < 0);
+	if (tmp == 0) return(0);
+	else return(RW_LOCK_BIAS-tmp);
+}
+
+/*
+ * return true if rwlock is write locked
+ * (note that other lock attempts can cause the lock value to be negative)
+ */
+#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock <= 0)
+#define IABS(x) ((x) > 0 ? (x) : -(x))
+#define RWLOCK_IS_READ_LOCKED(rwlock_ptr)  ((IABS((rwlock_ptr)->lock) % RW_LOCK_BIAS) != 0)
+
+/* this is a lot of typing just to get gcc to emit "rdtsc" */
+static inline long long get_cycles64 (void)
+{
+#ifndef CONFIG_X86_TSC
+	#error this code requires CONFIG_X86_TSC
+#else
+	union longlong_u {
+		long long intlong;
+		struct intint_s {
+			uint32_t eax;
+			uint32_t edx;
+		} intint;
+	} longlong; 
+
+	rdtsc(longlong.intint.eax,longlong.intint.edx);
+	return longlong.intlong;
+#endif
+}
+
+#endif /* _I386_LOCKMETER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/mach-default/irq_vectors.h 901-mjb1.1/include/asm-i386/mach-default/irq_vectors.h
--- 000-virgin/include/asm-i386/mach-default/irq_vectors.h	Sun Apr 20 19:35:05 2003
+++ 901-mjb1.1/include/asm-i386/mach-default/irq_vectors.h	Wed Aug 13 20:48:49 2003
@@ -68,15 +68,22 @@
 #define TIMER_IRQ 0
 
 /*
- * 16 8259A IRQ's, 208 potential APIC interrupt sources.
- * Right now the APIC is mostly only used for SMP.
- * 256 vectors is an architectural limit. (we can have
- * more than 256 devices theoretically, but they will
- * have to use shared interrupts)
+ * 16 8259A IRQ's, MAX_IRQ_SOURCES-16 potential APIC
+ * interrupt sources. Right now the APIC is mostly only
+ * used for SMP. 256 vectors is an architectural limit.
+ * (we can have more than 256 devices theoretically, but
+ * they will have to use shared interrupts)
  * Since vectors 0x00-0x1f are used/reserved for the CPU,
  * the usable vector space is 0x20-0xff (224 vectors)
+ * Linux currently makes 190 vectors available for io interrupts
+ * starting at FIRST_DEVICE_VECTOR till FIRST_SYSTEM_VECTOR
+ *
+ * 0________0x31__________________________0xef_________0xff
+ *   system           io interrupts           resvd/smp
+ *
  */
 #ifdef CONFIG_X86_IO_APIC
+#define NR_IRQ_VECTORS 190
 #define NR_IRQS 224
 #else
 #define NR_IRQS 16
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/mach-summit/mach_mpparse.h 901-mjb1.1/include/asm-i386/mach-summit/mach_mpparse.h
--- 000-virgin/include/asm-i386/mach-summit/mach_mpparse.h	Sat Jun 14 18:37:35 2003
+++ 901-mjb1.1/include/asm-i386/mach-summit/mach_mpparse.h	Wed Aug 13 20:55:51 2003
@@ -5,6 +5,12 @@
 
 extern int use_cyclone;
 
+#ifdef CONFIG_NUMA
+extern void setup_summit(void);
+#else /* !CONFIG_NUMA */
+#define setup_summit()	{}
+#endif /* CONFIG_NUMA */
+
 static inline void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, 
 				struct mpc_config_translation *translation)
 {
@@ -24,6 +30,7 @@ static inline int mps_oem_check(struct m
 			 || !strncmp(productid, "EXA", 3)
 			 || !strncmp(productid, "RUTHLESS SMP", 12))){
 		use_cyclone = 1; /*enable cyclone-timer*/
+		setup_summit();
 		return 1;
 	}
 	return 0;
@@ -36,6 +43,7 @@ static inline int acpi_madt_oem_check(ch
 	    (!strncmp(oem_table_id, "SERVIGIL", 8)
 	     || !strncmp(oem_table_id, "EXA", 3))){
 		use_cyclone = 1; /*enable cyclone-timer*/
+		setup_summit();
 		return 1;
 	}
 	return 0;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/mmzone.h 901-mjb1.1/include/asm-i386/mmzone.h
--- 000-virgin/include/asm-i386/mmzone.h	Tue Aug  5 19:59:16 2003
+++ 901-mjb1.1/include/asm-i386/mmzone.h	Wed Aug 13 20:51:53 2003
@@ -10,7 +10,49 @@
 
 #ifdef CONFIG_DISCONTIGMEM
 
+#ifdef CONFIG_NUMA
+	#ifdef CONFIG_X86_NUMAQ
+		#include <asm/numaq.h>
+	#else	/* summit or generic arch */
+		#include <asm/srat.h>
+	#endif
+#else /* !CONFIG_NUMA */
+	#define get_memcfg_numa get_memcfg_numa_flat
+	#define get_zholes_size(n) (0)
+#endif /* CONFIG_NUMA */
+
 extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)		(node_data[nid])
+
+/*
+ * generic node memory support, the following assumptions apply:
+ *
+ * 1) memory comes in 256Mb contigious chunks which are either present or not
+ * 2) we will not have more than 64Gb in total
+ *
+ * for now assume that 64Gb is max amount of RAM for whole system
+ *    64Gb / 4096bytes/page = 16777216 pages
+ */
+#define MAX_NR_PAGES 16777216
+#define MAX_ELEMENTS 256
+#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
+
+extern u8 physnode_map[];
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+#ifdef CONFIG_NUMA
+	return(physnode_map[(pfn) / PAGES_PER_ELEMENT]);
+#else
+	return 0;
+#endif
+}
+
+static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
+{
+	return(NODE_DATA(pfn_to_nid(pfn)));
+}
+
 
 /*
  * Following are macros that are specific to this numa platform.
@@ -43,11 +85,6 @@ extern struct pglist_data *node_data[];
  */
 #define kvaddr_to_nid(kaddr)	pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT)
 
-/*
- * Return a pointer to the node data for node n.
- */
-#define NODE_DATA(nid)		(node_data[nid])
-
 #define node_mem_map(nid)	(NODE_DATA(nid)->node_mem_map)
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 #define node_end_pfn(nid)						\
@@ -92,41 +129,6 @@ extern struct pglist_data *node_data[];
  * ( pfn_to_pgdat(pfn) && ((pfn) < node_end_pfn(pfn_to_nid(pfn))) ) 
  */ 
 #define pfn_valid(pfn)          ((pfn) < num_physpages)
-
-/*
- * generic node memory support, the following assumptions apply:
- *
- * 1) memory comes in 256Mb contigious chunks which are either present or not
- * 2) we will not have more than 64Gb in total
- *
- * for now assume that 64Gb is max amount of RAM for whole system
- *    64Gb / 4096bytes/page = 16777216 pages
- */
-#define MAX_NR_PAGES 16777216
-#define MAX_ELEMENTS 256
-#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
-
-extern u8 physnode_map[];
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-	return(physnode_map[(pfn) / PAGES_PER_ELEMENT]);
-}
-static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
-{
-	return(NODE_DATA(pfn_to_nid(pfn)));
-}
-
-#ifdef CONFIG_X86_NUMAQ
-#include <asm/numaq.h>
-#elif CONFIG_NUMA	/* summit or generic arch */
-#include <asm/srat.h>
-#elif CONFIG_X86_PC
-#define get_memcfg_numa get_memcfg_numa_flat
-#define get_zholes_size(n) (0)
-#else
-#define pfn_to_nid(pfn)		(0)
-#endif /* CONFIG_X86_NUMAQ */
 
 #endif /* CONFIG_DISCONTIGMEM */
 #endif /* _ASM_MMZONE_H_ */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/numaq.h 901-mjb1.1/include/asm-i386/numaq.h
--- 000-virgin/include/asm-i386/numaq.h	Mon Mar 17 21:43:48 2003
+++ 901-mjb1.1/include/asm-i386/numaq.h	Wed Aug 13 20:48:49 2003
@@ -29,6 +29,8 @@
 #ifdef CONFIG_X86_NUMAQ
 
 #define MAX_NUMNODES		8
+
+#ifndef __ASSEMBLY__
 extern void get_memcfg_numaq(void);
 #define get_memcfg_numa() get_memcfg_numaq()
 
@@ -161,6 +163,7 @@ static inline unsigned long *get_zholes_
 {
 	return 0;
 }
+#endif /* __ASSEMBLY__ */
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* NUMAQ_H */
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/page.h 901-mjb1.1/include/asm-i386/page.h
--- 000-virgin/include/asm-i386/page.h	Tue Apr  8 14:38:20 2003
+++ 901-mjb1.1/include/asm-i386/page.h	Wed Aug 13 20:27:43 2003
@@ -115,9 +115,26 @@ static __inline__ int get_order(unsigned
 #endif /* __ASSEMBLY__ */
 
 #ifdef __ASSEMBLY__
-#define __PAGE_OFFSET		(0xC0000000)
+#include <linux/config.h>
+#ifdef CONFIG_05GB
+#define __PAGE_OFFSET          (0xE0000000)
+#elif defined(CONFIG_1GB)
+#define __PAGE_OFFSET          (0xC0000000)
+#elif defined(CONFIG_2GB)
+#define __PAGE_OFFSET          (0x80000000)
+#elif defined(CONFIG_3GB)
+#define __PAGE_OFFSET          (0x40000000)
+#endif
 #else
-#define __PAGE_OFFSET		(0xC0000000UL)
+#ifdef CONFIG_05GB
+#define __PAGE_OFFSET          (0xE0000000UL)
+#elif defined(CONFIG_1GB)
+#define __PAGE_OFFSET          (0xC0000000UL)
+#elif defined(CONFIG_2GB)
+#define __PAGE_OFFSET          (0x80000000UL)
+#elif defined(CONFIG_3GB)
+#define __PAGE_OFFSET          (0x40000000UL)
+#endif
 #endif
 
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/param.h 901-mjb1.1/include/asm-i386/param.h
--- 000-virgin/include/asm-i386/param.h	Sun Nov 17 20:29:26 2002
+++ 901-mjb1.1/include/asm-i386/param.h	Wed Aug 13 20:27:41 2003
@@ -2,10 +2,18 @@
 #define _ASMi386_PARAM_H
 
 #ifdef __KERNEL__
-# define HZ		1000		/* Internal kernel timer frequency */
-# define USER_HZ	100		/* .. some user interfaces are in "ticks" */
-# define CLOCKS_PER_SEC	(USER_HZ)	/* like times() */
+#include <linux/config.h>
+
+#ifdef CONFIG_1000HZ
+# define HZ	1000		/* Internal kernel timer frequency */
+#else
+# define HZ	100
 #endif
+
+#define USER_HZ	100		/* .. some user interfaces are in "ticks" */
+#define CLOCKS_PER_SEC	(USER_HZ)	/* like times() */
+
+#endif	/* __KERNEL__ */
 
 #ifndef HZ
 #define HZ 100
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/pgtable.h 901-mjb1.1/include/asm-i386/pgtable.h
--- 000-virgin/include/asm-i386/pgtable.h	Wed Aug 13 20:24:30 2003
+++ 901-mjb1.1/include/asm-i386/pgtable.h	Wed Aug 13 20:51:03 2003
@@ -34,9 +34,11 @@ extern unsigned long empty_zero_page[102
 extern pgd_t swapper_pg_dir[1024];
 extern kmem_cache_t *pgd_cache;
 extern kmem_cache_t *pmd_cache;
+extern kmem_cache_t *kernel_pmd_cache;
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
 
+void kernel_pmd_ctor(void *, kmem_cache_t *, unsigned long);
 void pmd_ctor(void *, kmem_cache_t *, unsigned long);
 void pgd_ctor(void *, kmem_cache_t *, unsigned long);
 void pgd_dtor(void *, kmem_cache_t *, unsigned long);
@@ -63,7 +65,22 @@ void paging_init(void);
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define __USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define FIRST_KERNEL_PGD_PTR	(__USER_PTRS_PER_PGD)
+#define PARTIAL_PGD	(TASK_SIZE > __USER_PTRS_PER_PGD*PGDIR_SIZE ? 1 : 0)
+#define PARTIAL_PMD	((TASK_SIZE % PGDIR_SIZE)/PMD_SIZE)
+#define USER_PTRS_PER_PGD	(PARTIAL_PGD + __USER_PTRS_PER_PGD)
+#ifndef __ASSEMBLY__
+static inline int USER_PTRS_PER_PMD(int pgd_index) {
+	if (pgd_index < __USER_PTRS_PER_PGD)
+		return PTRS_PER_PMD;
+	else if (PARTIAL_PMD && (pgd_index == __USER_PTRS_PER_PGD))
+		return (PTRS_PER_PMD-PARTIAL_PMD);
+	else
+		return 0;
+}
+#endif
+
 #define FIRST_USER_PGD_NR	0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/posix_types.h 901-mjb1.1/include/asm-i386/posix_types.h
--- 000-virgin/include/asm-i386/posix_types.h	Sun Apr 20 19:35:05 2003
+++ 901-mjb1.1/include/asm-i386/posix_types.h	Wed Aug 13 20:48:46 2003
@@ -7,7 +7,7 @@
  * assume GCC is being used.
  */
 
-typedef unsigned short	__kernel_dev_t;
+typedef unsigned long	__kernel_dev_t;
 typedef unsigned long	__kernel_ino_t;
 typedef unsigned short	__kernel_mode_t;
 typedef unsigned short	__kernel_nlink_t;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/processor.h 901-mjb1.1/include/asm-i386/processor.h
--- 000-virgin/include/asm-i386/processor.h	Tue Jun 24 21:29:24 2003
+++ 901-mjb1.1/include/asm-i386/processor.h	Wed Aug 13 20:29:29 2003
@@ -288,7 +288,11 @@ extern unsigned int mca_pentium_flag;
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
+#ifdef CONFIG_05GB
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 16))
+#else
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+#endif
 
 /*
  * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
@@ -406,6 +410,9 @@ struct thread_struct {
 	unsigned int		saved_fs, saved_gs;
 /* IO permissions */
 	unsigned long	*ts_io_bitmap;
+#ifdef CONFIG_X86_REMOTE_DEBUG
+	struct pt_regs *kgdbregs;
+#endif
 };
 
 #define INIT_THREAD  {							\
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/rwlock.h 901-mjb1.1/include/asm-i386/rwlock.h
--- 000-virgin/include/asm-i386/rwlock.h	Sun Nov 17 20:29:57 2002
+++ 901-mjb1.1/include/asm-i386/rwlock.h	Wed Aug 13 20:29:33 2003
@@ -20,28 +20,52 @@
 #define RW_LOCK_BIAS		 0x01000000
 #define RW_LOCK_BIAS_STR	"0x01000000"
 
-#define __build_read_lock_ptr(rw, helper)   \
-	asm volatile(LOCK "subl $1,(%0)\n\t" \
-		     "js 2f\n" \
-		     "1:\n" \
-		     LOCK_SECTION_START("") \
-		     "2:\tcall " helper "\n\t" \
-		     "jmp 1b\n" \
-		     LOCK_SECTION_END \
-		     ::"a" (rw) : "memory")
-
-#define __build_read_lock_const(rw, helper)   \
-	asm volatile(LOCK "subl $1,%0\n\t" \
-		     "js 2f\n" \
-		     "1:\n" \
-		     LOCK_SECTION_START("") \
-		     "2:\tpushl %%eax\n\t" \
-		     "leal %0,%%eax\n\t" \
-		     "call " helper "\n\t" \
-		     "popl %%eax\n\t" \
-		     "jmp 1b\n" \
-		     LOCK_SECTION_END \
-		     :"=m" (*(volatile int *)rw) : : "memory")
+#ifdef CONFIG_SPINLINE
+
+	#define __build_read_lock_ptr(rw, helper)   \
+		asm volatile(LOCK "subl $1,(%0)\n\t" \
+			     "jns 1f\n\t" \
+			     "call " helper "\n\t" \
+			     "1:\t" \
+			     ::"a" (rw) : "memory")
+
+	#define __build_read_lock_const(rw, helper)   \
+		asm volatile(LOCK "subl $1,%0\n\t" \
+			     "jns 1f\n\t" \
+			     "pushl %%eax\n\t" \
+			     "leal %0,%%eax\n\t" \
+			     "call " helper "\n\t" \
+			     "popl %%eax\n\t" \
+			     "1:\t" \
+			     :"=m" (*(volatile int *)rw) : : "memory")
+
+#else /* !CONFIG_SPINLINE */
+
+	#define __build_read_lock_ptr(rw, helper)   \
+		asm volatile(LOCK "subl $1,(%0)\n\t" \
+			     "js 2f\n" \
+			     "1:\n" \
+			     LOCK_SECTION_START("") \
+			     "2:\tcall " helper "\n\t" \
+			     "jmp 1b\n" \
+			     LOCK_SECTION_END \
+			     ::"a" (rw) : "memory")
+
+	#define __build_read_lock_const(rw, helper)   \
+		asm volatile(LOCK "subl $1,%0\n\t" \
+			     "js 2f\n" \
+			     "1:\n" \
+			     LOCK_SECTION_START("") \
+			     "2:\tpushl %%eax\n\t" \
+			     "leal %0,%%eax\n\t" \
+			     "call " helper "\n\t" \
+			     "popl %%eax\n\t" \
+			     "jmp 1b\n" \
+			     LOCK_SECTION_END \
+			     :"=m" (*(volatile int *)rw) : : "memory")
+
+#endif /* CONFIG_SPINLINE */
+
 
 #define __build_read_lock(rw, helper)	do { \
 						if (__builtin_constant_p(rw)) \
@@ -50,28 +74,51 @@
 							__build_read_lock_ptr(rw, helper); \
 					} while (0)
 
-#define __build_write_lock_ptr(rw, helper) \
-	asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
-		     "jnz 2f\n" \
-		     "1:\n" \
-		     LOCK_SECTION_START("") \
-		     "2:\tcall " helper "\n\t" \
-		     "jmp 1b\n" \
-		     LOCK_SECTION_END \
-		     ::"a" (rw) : "memory")
-
-#define __build_write_lock_const(rw, helper) \
-	asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
-		     "jnz 2f\n" \
-		     "1:\n" \
-		     LOCK_SECTION_START("") \
-		     "2:\tpushl %%eax\n\t" \
-		     "leal %0,%%eax\n\t" \
-		     "call " helper "\n\t" \
-		     "popl %%eax\n\t" \
-		     "jmp 1b\n" \
-		     LOCK_SECTION_END \
-		     :"=m" (*(volatile int *)rw) : : "memory")
+#ifdef CONFIG_SPINLINE
+
+	#define __build_write_lock_ptr(rw, helper) \
+		asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+			     "jz 1f\n\t" \
+			     "call " helper "\n\t" \
+			     "1:\n" \
+			     ::"a" (rw) : "memory")
+
+	#define __build_write_lock_const(rw, helper) \
+		asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
+			     "jz 1f\n\t" \
+			     "pushl %%eax\n\t" \
+			     "leal %0,%%eax\n\t" \
+			     "call " helper "\n\t" \
+			     "popl %%eax\n\t" \
+			     "1:\n" \
+			     :"=m" (*(volatile int *)rw) : : "memory")
+
+#else /* !CONFIG_SPINLINE */
+
+	#define __build_write_lock_ptr(rw, helper) \
+		asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+			     "jnz 2f\n" \
+			     "1:\n" \
+			     LOCK_SECTION_START("") \
+			     "2:\tcall " helper "\n\t" \
+			     "jmp 1b\n" \
+			     LOCK_SECTION_END \
+			     ::"a" (rw) : "memory")
+
+	#define __build_write_lock_const(rw, helper) \
+		asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
+			     "jnz 2f\n" \
+			     "1:\n" \
+			     LOCK_SECTION_START("") \
+			     "2:\tpushl %%eax\n\t" \
+			     "leal %0,%%eax\n\t" \
+			     "call " helper "\n\t" \
+			     "popl %%eax\n\t" \
+			     "jmp 1b\n" \
+			     LOCK_SECTION_END \
+			     :"=m" (*(volatile int *)rw) : : "memory")
+
+#endif /* CONFIG_SPINLINE */
 
 #define __build_write_lock(rw, helper)	do { \
 						if (__builtin_constant_p(rw)) \
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/segment.h 901-mjb1.1/include/asm-i386/segment.h
--- 000-virgin/include/asm-i386/segment.h	Tue Feb 25 23:03:50 2003
+++ 901-mjb1.1/include/asm-i386/segment.h	Wed Aug 13 20:48:49 2003
@@ -94,5 +94,5 @@
  * of tasks we can have..
  */
 #define IDT_ENTRIES 256
-
+#define IDT_SIZE (IDT_ENTRIES * 8)
 #endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/spinlock.h 901-mjb1.1/include/asm-i386/spinlock.h
--- 000-virgin/include/asm-i386/spinlock.h	Fri May 30 19:02:20 2003
+++ 901-mjb1.1/include/asm-i386/spinlock.h	Wed Aug 13 20:29:36 2003
@@ -43,18 +43,35 @@ typedef struct {
 #define spin_is_locked(x)	(*(volatile signed char *)(&(x)->lock) <= 0)
 #define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
 
-#define spin_lock_string \
-	"\n1:\t" \
-	"lock ; decb %0\n\t" \
-	"js 2f\n" \
-	LOCK_SECTION_START("") \
-	"2:\t" \
-	"rep;nop\n\t" \
-	"cmpb $0,%0\n\t" \
-	"jle 2b\n\t" \
-	"jmp 1b\n" \
-	LOCK_SECTION_END
+#ifdef CONFIG_SPINLINE
 
+	#define spin_lock_string \
+		"\n1:\t" \
+		"lock ; decb %0\n\t" \
+		"js 2f\n" \
+		"jmp 3f\n" \
+		"2:\t" \
+		"rep;nop\n\t" \
+		"cmpb $0,%0\n\t" \
+		"jle 2b\n\t" \
+		"jmp 1b\n" \
+		"3:\t"
+
+#else /* !CONFIG_SPINLINE */
+
+	#define spin_lock_string \
+		"\n1:\t" \
+		"lock ; decb %0\n\t" \
+		"js 2f\n" \
+		LOCK_SECTION_START("") \
+		"2:\t" \
+		"rep;nop\n\t" \
+		"cmpb $0,%0\n\t" \
+		"jle 2b\n\t" \
+		"jmp 1b\n" \
+		LOCK_SECTION_END
+
+#endif /* CONFIG_SPINLINE */
 /*
  * This works. Despite all the confusion.
  * (except on PPro SMP or if we are using OOSTORE)
@@ -138,6 +155,11 @@ here:
  */
 typedef struct {
 	volatile unsigned int lock;
+#if CONFIG_LOCKMETER
+	/* required for LOCKMETER since all bits in lock are used */
+	/* and we need this storage for CPU and lock INDEX        */
+	unsigned lockmeter_magic;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	unsigned magic;
 #endif
@@ -145,11 +167,19 @@ typedef struct {
 
 #define RWLOCK_MAGIC	0xdeaf1eed
 
+#ifdef CONFIG_LOCKMETER
+#if CONFIG_DEBUG_SPINLOCK
+#define RWLOCK_MAGIC_INIT	, 0, RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT	, 0
+#endif
+#else /* !CONFIG_LOCKMETER */
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define RWLOCK_MAGIC_INIT	, RWLOCK_MAGIC
 #else
 #define RWLOCK_MAGIC_INIT	/* */
 #endif
+#endif /* !CONFIG_LOCKMETER */
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
 
@@ -195,5 +225,59 @@ static inline int _raw_write_trylock(rwl
 	atomic_add(RW_LOCK_BIAS, count);
 	return 0;
 }
+
+#ifdef CONFIG_LOCKMETER
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+/* FIXME -- replace with assembler */
+	atomic_t *count = (atomic_t *)lock;
+	atomic_dec(count);
+	if (count->counter > 0)
+		return 1;
+	atomic_inc(count);
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_LOCKMETER) && defined(CONFIG_HAVE_DEC_LOCK)
+extern void _metered_spin_lock  (spinlock_t *lock);
+extern void _metered_spin_unlock(spinlock_t *lock);
+
+/*
+ *  Matches what is in arch/i386/lib/dec_and_lock.c, except this one is
+ *  "static inline" so that the spin_lock(), if actually invoked, is charged
+ *  against the real caller, not against the catch-all atomic_dec_and_lock
+ */
+static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	int counter;
+	int newcount;
+
+repeat:
+	counter = atomic_read(atomic);
+	newcount = counter-1;
+
+	if (!newcount)
+		goto slow_path;
+
+	asm volatile("lock; cmpxchgl %1,%2"
+		:"=a" (newcount)
+		:"r" (newcount), "m" (atomic->counter), "0" (counter));
+
+	/* If the above failed, "eax" will have changed */
+	if (newcount != counter)
+		goto repeat;
+	return 0;
+
+slow_path:
+	_metered_spin_lock(lock);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	_metered_spin_unlock(lock);
+	return 0;
+}
+
+#define ATOMIC_DEC_AND_LOCK
+#endif
 
 #endif /* __ASM_SPINLOCK_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/srat.h 901-mjb1.1/include/asm-i386/srat.h
--- 000-virgin/include/asm-i386/srat.h	Mon Mar 17 21:43:48 2003
+++ 901-mjb1.1/include/asm-i386/srat.h	Wed Aug 13 20:48:49 2003
@@ -28,8 +28,9 @@
 #define _ASM_SRAT_H_
 
 #define MAX_NUMNODES		8
+#ifndef __ASSEMBLY__
 extern void get_memcfg_from_srat(void);
 extern unsigned long *get_zholes_size(int);
 #define get_memcfg_numa() get_memcfg_from_srat()
-
+#endif
 #endif /* _ASM_SRAT_H_ */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-i386/unistd.h 901-mjb1.1/include/asm-i386/unistd.h
--- 000-virgin/include/asm-i386/unistd.h	Tue Aug  5 20:01:43 2003
+++ 901-mjb1.1/include/asm-i386/unistd.h	Wed Aug 13 20:51:50 2003
@@ -228,7 +228,7 @@
 #define __NR_madvise1		219	/* delete when C lib stub is removed */
 #define __NR_getdents64		220
 #define __NR_fcntl64		221
-/* 223 is unused */
+#define __NR_mbind		223
 #define __NR_gettid		224
 #define __NR_readahead		225
 #define __NR_setxattr		226
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-ia64/lockmeter.h 901-mjb1.1/include/asm-ia64/lockmeter.h
--- 000-virgin/include/asm-ia64/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-ia64/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (C) 1999,2000 Silicon Graphics, Inc.
+ *
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.h by Jack Steiner (steiner@sgi.com)
+ */
+
+#ifndef _IA64_LOCKMETER_H
+#define _IA64_LOCKMETER_H
+
+#ifdef local_cpu_data
+#define CPU_CYCLE_FREQUENCY	local_cpu_data->itc_freq
+#else
+#define CPU_CYCLE_FREQUENCY	my_cpu_data.itc_freq
+#endif
+#define get_cycles64()		get_cycles()
+
+#define THIS_CPU_NUMBER		smp_processor_id()
+
+/*
+ * macros to cache and retrieve an index value inside of a lock
+ * these macros assume that there are less than 65536 simultaneous
+ * (read mode) holders of a rwlock.
+ * we also assume that the hash table has less than 32767 entries.
+ */
+/*
+ * instrumented spinlock structure -- never used to allocate storage
+ * only used in macros below to overlay a spinlock_t
+ */
+typedef struct inst_spinlock_s {
+	/* remember, Intel is little endian */
+	volatile unsigned short lock;
+	volatile unsigned short index;
+} inst_spinlock_t;
+#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv
+#define GET_INDEX(lock_ptr)        ((inst_spinlock_t *)(lock_ptr))->index
+
+/*
+ * macros to cache and retrieve an index value in a read/write lock
+ * as well as the cpu where a reader busy period started
+ * we use the 2nd word (the debug word) for this, so require the
+ * debug word to be present
+ */
+/*
+ * instrumented rwlock structure -- never used to allocate storage
+ * only used in macros below to overlay a rwlock_t
+ */
+typedef struct inst_rwlock_s {
+	volatile int read_counter:31;
+	volatile int write_lock:1;
+	volatile unsigned short index;
+	volatile unsigned short cpu;
+} inst_rwlock_t;
+#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
+#define GET_RWINDEX(rwlock_ptr)        ((inst_rwlock_t *)(rwlock_ptr))->index
+#define PUT_RW_CPU(rwlock_ptr,cpuv)    ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
+#define GET_RW_CPU(rwlock_ptr)         ((inst_rwlock_t *)(rwlock_ptr))->cpu
+
+/* 
+ * return the number of readers for a rwlock_t
+ */
+#define RWLOCK_READERS(rwlock_ptr)	((rwlock_ptr)->read_counter)
+
+/*
+ * return true if rwlock is write locked
+ * (note that other lock attempts can cause the lock value to be negative)
+ */
+#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->write_lock)
+#define RWLOCK_IS_READ_LOCKED(rwlock_ptr)  ((rwlock_ptr)->read_counter)
+
+#endif /* _IA64_LOCKMETER_H */
+
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-ia64/pgtable.h 901-mjb1.1/include/asm-ia64/pgtable.h
--- 000-virgin/include/asm-ia64/pgtable.h	Tue Jun 24 21:29:24 2003
+++ 901-mjb1.1/include/asm-ia64/pgtable.h	Wed Aug 13 20:51:03 2003
@@ -92,6 +92,7 @@
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD		(__IA64_UL(1) << (PAGE_SHIFT-3))
 #define USER_PTRS_PER_PGD	(5*PTRS_PER_PGD/8)	/* regions 0-4 are user regions */
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 /*
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-ia64/spinlock.h 901-mjb1.1/include/asm-ia64/spinlock.h
--- 000-virgin/include/asm-ia64/spinlock.h	Tue Aug  5 20:01:54 2003
+++ 901-mjb1.1/include/asm-ia64/spinlock.h	Wed Aug 13 20:29:36 2003
@@ -153,4 +153,25 @@ do {										\
 	clear_bit(31, (x));								\
 })
 
+#ifdef CONFIG_LOCKMETER
+extern void _metered_spin_lock  (spinlock_t *lock);
+extern void _metered_spin_unlock(spinlock_t *lock);
+
+/*
+ *  Use a less efficient, and inline, atomic_dec_and_lock() if lockmetering
+ *  so we can see the callerPC of who is actually doing the spin_lock().
+ *  Otherwise, all we see is the generic rollup of all locks done by
+ *  atomic_dec_and_lock().
+ */
+static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	_metered_spin_lock(lock);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	_metered_spin_unlock(lock);
+	return 0;
+}
+#define ATOMIC_DEC_AND_LOCK
+#endif
+
 #endif /*  _ASM_IA64_SPINLOCK_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-m68k/pgtable.h 901-mjb1.1/include/asm-m68k/pgtable.h
--- 000-virgin/include/asm-m68k/pgtable.h	Sat Jun 14 18:37:35 2003
+++ 901-mjb1.1/include/asm-m68k/pgtable.h	Wed Aug 13 20:51:03 2003
@@ -58,6 +58,7 @@
 #define PTRS_PER_PGD	128
 #endif
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 /* Virtual address region for use by kernel_map() */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-mips/lockmeter.h 901-mjb1.1/include/asm-mips/lockmeter.h
--- 000-virgin/include/asm-mips/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-mips/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,126 @@
+/*
+ *  Copyright (C) 1999,2000 Silicon Graphics, Inc.
+ *
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.h by Jack Steiner (steiner@sgi.com)
+ *  Ported to mips32 for Asita Technologies
+ *   by D.J. Barrow ( dj.barrow@asitatechnologies.com ) 
+ */
+#ifndef _ASM_LOCKMETER_H
+#define _ASM_LOCKMETER_H
+
+/* do_gettimeoffset is a function pointer on mips */
+/* & it is not included by <linux/time.h> */
+#include <asm/time.h>
+#include <linux/time.h>
+#include <asm/div64.h>
+
+#define SPINLOCK_MAGIC_INIT	/* */
+
+#define CPU_CYCLE_FREQUENCY	get_cpu_cycle_frequency()
+
+#define THIS_CPU_NUMBER		smp_processor_id()
+
+static uint32_t cpu_cycle_frequency = 0;
+
+static uint32_t get_cpu_cycle_frequency(void)
+{
+    /* a total hack, slow and invasive, but ... it works */
+    int sec;
+    uint32_t start_cycles;
+    struct timeval tv;
+
+    if (cpu_cycle_frequency == 0) {	/* uninitialized */
+	do_gettimeofday(&tv);
+	sec = tv.tv_sec;	/* set up to catch the tv_sec rollover */
+	while (sec == tv.tv_sec) { do_gettimeofday(&tv); }
+	sec = tv.tv_sec;	/* rolled over to a new sec value */
+	start_cycles = get_cycles();
+	while (sec == tv.tv_sec) { do_gettimeofday(&tv); }
+	cpu_cycle_frequency = get_cycles() - start_cycles;
+    }
+
+    return cpu_cycle_frequency;
+}
+
+extern struct timeval xtime;
+
+static uint64_t get_cycles64(void)
+{
+    static uint64_t last_get_cycles64 = 0;
+    uint64_t ret;
+    unsigned long sec;
+    unsigned long usec, usec_offset;
+
+again:
+    sec  = xtime.tv_sec;
+    usec = xtime.tv_usec;
+    usec_offset = do_gettimeoffset();
+    if ((xtime.tv_sec != sec)  ||
+	(xtime.tv_usec != usec)||
+	(usec_offset >= 20000))
+	goto again;
+
+    ret = ((uint64_t)(usec + usec_offset) * cpu_cycle_frequency);
+    /* We can't do a normal 64 bit division on mips without libgcc.a */
+    do_div(ret,1000000);
+    ret +=  ((uint64_t)sec * cpu_cycle_frequency);
+
+    /* XXX why does time go backwards?  do_gettimeoffset?  general time adj? */
+    if (ret <= last_get_cycles64)
+	ret  = last_get_cycles64+1;
+    last_get_cycles64 = ret;
+
+    return ret;
+}
+
+/*
+ * macros to cache and retrieve an index value inside of a lock
+ * these macros assume that there are less than 65536 simultaneous
+ * (read mode) holders of a rwlock.
+ * we also assume that the hash table has less than 32767 entries.
+ * the high order bit is used for write locking a rw_lock
+ */
+#define INDEX_MASK   0x7FFF0000
+#define READERS_MASK 0x0000FFFF
+#define INDEX_SHIFT 16
+#define PUT_INDEX(lockp,index)   \
+        lockp->lock = (((lockp->lock) & ~INDEX_MASK) | (index) << INDEX_SHIFT)
+#define GET_INDEX(lockp) \
+        (((lockp->lock) & INDEX_MASK) >> INDEX_SHIFT)
+
+/*
+ * macros to cache and retrieve an index value in a read/write lock
+ * as well as the cpu where a reader busy period started
+ * we use the 2nd word (the debug word) for this, so require the
+ * debug word to be present
+ */
+/*
+ * instrumented rwlock structure -- never used to allocate storage
+ * only used in macros below to overlay a rwlock_t
+ */
+typedef struct inst_rwlock_s {
+	volatile int lock;
+	unsigned short index;
+	unsigned short cpu;
+} inst_rwlock_t;
+#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
+#define GET_RWINDEX(rwlock_ptr)        ((inst_rwlock_t *)(rwlock_ptr))->index
+#define PUT_RW_CPU(rwlock_ptr,cpuv)    ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
+#define GET_RW_CPU(rwlock_ptr)         ((inst_rwlock_t *)(rwlock_ptr))->cpu
+
+/* 
+ * return the number of readers for a rwlock_t
+ */
+#define RWLOCK_READERS(rwlock_ptr)   rwlock_readers(rwlock_ptr)
+
+extern inline int rwlock_readers(rwlock_t *rwlock_ptr) 
+{
+	int tmp = (int) rwlock_ptr->lock;
+	return (tmp >= 0) ? tmp : 0;
+}
+
+#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock < 0)
+#define RWLOCK_IS_READ_LOCKED(rwlock_ptr)  ((rwlock_ptr)->lock > 0)
+
+#endif /* _ASM_LOCKMETER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-mips/spinlock.h 901-mjb1.1/include/asm-mips/spinlock.h
--- 000-virgin/include/asm-mips/spinlock.h	Wed Jul  2 21:59:13 2003
+++ 901-mjb1.1/include/asm-mips/spinlock.h	Wed Aug 13 20:29:36 2003
@@ -91,9 +91,18 @@ static inline unsigned int _raw_spin_try
 
 typedef struct {
 	volatile unsigned int lock;
+#if CONFIG_LOCKMETER
+	/* required for LOCKMETER since all bits in lock are used */
+	/* and we need this storage for CPU and lock INDEX        */
+	unsigned lockmeter_magic;
+#endif
 } rwlock_t;
 
+#ifdef CONFIG_LOCKMETER
+#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
+#else
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
+#endif
 
 #define rwlock_init(x)  do { *(x) = RW_LOCK_UNLOCKED; } while(0)
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-mips64/lockmeter.h 901-mjb1.1/include/asm-mips64/lockmeter.h
--- 000-virgin/include/asm-mips64/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-mips64/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,120 @@
+/*
+ *  Copyright (C) 1999,2000 Silicon Graphics, Inc.
+ *
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.h by Jack Steiner (steiner@sgi.com)
+ */
+
+#ifndef _ASM_LOCKMETER_H
+#define _ASM_LOCKMETER_H
+
+#include <linux/time.h>
+
+#define SPINLOCK_MAGIC_INIT	/* */
+
+#define CPU_CYCLE_FREQUENCY	get_cpu_cycle_frequency()
+
+#define THIS_CPU_NUMBER		smp_processor_id()
+
+static uint32_t cpu_cycle_frequency = 0;
+
+static uint32_t get_cpu_cycle_frequency(void)
+{
+    /* a total hack, slow and invasive, but ... it works */
+    int sec;
+    uint32_t start_cycles;
+    struct timeval tv;
+
+    if (cpu_cycle_frequency == 0) {	/* uninitialized */
+	do_gettimeofday(&tv);
+	sec = tv.tv_sec;	/* set up to catch the tv_sec rollover */
+	while (sec == tv.tv_sec) { do_gettimeofday(&tv); }
+	sec = tv.tv_sec;	/* rolled over to a new sec value */
+	start_cycles = get_cycles();
+	while (sec == tv.tv_sec) { do_gettimeofday(&tv); }
+	cpu_cycle_frequency = get_cycles() - start_cycles;
+    }
+
+    return cpu_cycle_frequency;
+}
+
+extern struct timeval xtime;
+extern long do_gettimeoffset(void);
+
+static uint64_t get_cycles64(void)
+{
+    static uint64_t last_get_cycles64 = 0;
+    uint64_t ret;
+    unsigned long sec;
+    unsigned long usec, usec_offset;
+
+again:
+    sec  = xtime.tv_sec;
+    usec = xtime.tv_usec;
+    usec_offset = do_gettimeoffset();
+    if ((xtime.tv_sec != sec)  ||
+	(xtime.tv_usec != usec)||
+	(usec_offset >= 20000))
+	goto again;
+
+    ret =  ((uint64_t)sec * cpu_cycle_frequency)
+	+ ( ((uint64_t)(usec + usec_offset) * cpu_cycle_frequency) / 1000000 );
+
+    /* XXX why does time go backwards?  do_gettimeoffset?  general time adj? */
+    if (ret <= last_get_cycles64)
+	ret  = last_get_cycles64+1;
+    last_get_cycles64 = ret;
+
+    return ret;
+}
+
+/*
+ * macros to cache and retrieve an index value inside of a lock
+ * these macros assume that there are less than 65536 simultaneous
+ * (read mode) holders of a rwlock.
+ * we also assume that the hash table has less than 32767 entries.
+ * the high order bit is used for write locking a rw_lock
+ */
+#define INDEX_MASK   0x7FFF0000
+#define READERS_MASK 0x0000FFFF
+#define INDEX_SHIFT 16
+#define PUT_INDEX(lockp,index)   \
+        lockp->lock = (((lockp->lock) & ~INDEX_MASK) | (index) << INDEX_SHIFT)
+#define GET_INDEX(lockp) \
+        (((lockp->lock) & INDEX_MASK) >> INDEX_SHIFT)
+
+/*
+ * macros to cache and retrieve an index value in a read/write lock
+ * as well as the cpu where a reader busy period started
+ * we use the 2nd word (the debug word) for this, so require the
+ * debug word to be present
+ */
+/*
+ * instrumented rwlock structure -- never used to allocate storage
+ * only used in macros below to overlay a rwlock_t
+ */
+typedef struct inst_rwlock_s {
+	volatile int lock;
+	unsigned short index;
+	unsigned short cpu;
+} inst_rwlock_t;
+#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
+#define GET_RWINDEX(rwlock_ptr)        ((inst_rwlock_t *)(rwlock_ptr))->index
+#define PUT_RW_CPU(rwlock_ptr,cpuv)    ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
+#define GET_RW_CPU(rwlock_ptr)         ((inst_rwlock_t *)(rwlock_ptr))->cpu
+
+/* 
+ * return the number of readers for a rwlock_t
+ */
+#define RWLOCK_READERS(rwlock_ptr)   rwlock_readers(rwlock_ptr)
+
+extern inline int rwlock_readers(rwlock_t *rwlock_ptr) 
+{
+	int tmp = (int) rwlock_ptr->lock;
+	return (tmp >= 0) ? tmp : 0;
+}
+
+#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock < 0)
+#define RWLOCK_IS_READ_LOCKED(rwlock_ptr)  ((rwlock_ptr)->lock > 0)
+
+#endif /* _ASM_LOCKMETER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-parisc/pgtable.h 901-mjb1.1/include/asm-parisc/pgtable.h
--- 000-virgin/include/asm-parisc/pgtable.h	Tue Aug  5 20:01:43 2003
+++ 901-mjb1.1/include/asm-parisc/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -81,6 +81,7 @@
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD    (1UL << (PAGE_SHIFT - PT_NLEVELS))
 #define USER_PTRS_PER_PGD       PTRS_PER_PGD
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 
 /* Definitions for 2nd level */
 #define pgtable_cache_init()	do { } while (0)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-ppc/pgtable.h 901-mjb1.1/include/asm-ppc/pgtable.h
--- 000-virgin/include/asm-ppc/pgtable.h	Sat Jun 14 18:37:36 2003
+++ 901-mjb1.1/include/asm-ppc/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -83,6 +83,7 @@ extern unsigned long ioremap_bot, iorema
 #define PTRS_PER_PMD	1
 #define PTRS_PER_PGD	1024
 #define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-ppc64/pgtable.h 901-mjb1.1/include/asm-ppc64/pgtable.h
--- 000-virgin/include/asm-ppc64/pgtable.h	Sat Jun 14 18:37:36 2003
+++ 901-mjb1.1/include/asm-ppc64/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -36,6 +36,7 @@
 #define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
 
 #define USER_PTRS_PER_PGD	(1024)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 #define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-sh/pgtable.h 901-mjb1.1/include/asm-sh/pgtable.h
--- 000-virgin/include/asm-sh/pgtable.h	Wed Jul  2 21:59:15 2003
+++ 901-mjb1.1/include/asm-sh/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -41,6 +41,7 @@ extern unsigned long empty_zero_page[102
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 #define PTE_PHYS_MASK	0x1ffff000
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-sparc/pgtable.h 901-mjb1.1/include/asm-sparc/pgtable.h
--- 000-virgin/include/asm-sparc/pgtable.h	Sat May 10 18:35:03 2003
+++ 901-mjb1.1/include/asm-sparc/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -125,6 +125,7 @@ BTFIXUPDEF_INT(page_kernel)
 #define PTRS_PER_PMD    	BTFIXUP_SIMM13(ptrs_per_pmd)
 #define PTRS_PER_PGD    	BTFIXUP_SIMM13(ptrs_per_pgd)
 #define USER_PTRS_PER_PGD	BTFIXUP_SIMM13(user_ptrs_per_pgd)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 #define PAGE_NONE      __pgprot(BTFIXUP_INT(page_none))
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-sparc64/lockmeter.h 901-mjb1.1/include/asm-sparc64/lockmeter.h
--- 000-virgin/include/asm-sparc64/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-sparc64/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com)
+ */
+
+#ifndef _SPARC64_LOCKMETER_H
+#define _SPARC64_LOCKMETER_H
+
+#include <asm/spinlock.h>
+
+#include <linux/version.h>
+
+extern unsigned long cpu_hz;
+#define CPU_CYCLE_FREQUENCY	cpu_hz
+
+#define THIS_CPU_NUMBER		__cpu_number_map[smp_processor_id()]
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)
+#define local_irq_save(x)	__save_and_cli(x)
+#define local_irq_restore(x)	__restore_flags(x)
+#endif /* Linux version 2.2.x */
+
+#define PUT_INDEX(lock_ptr,indexv)	(lock_ptr)->index = (indexv)
+#define GET_INDEX(lock_ptr)		(lock_ptr)->index
+
+#define PUT_RWINDEX(rwlock_ptr,indexv) (rwlock_ptr)->index = (indexv)
+#define GET_RWINDEX(rwlock_ptr)        (rwlock_ptr)->index
+#define PUT_RW_CPU(rwlock_ptr,cpuv)    (rwlock_ptr)->cpu = (cpuv)
+#define GET_RW_CPU(rwlock_ptr)         (rwlock_ptr)->cpu
+
+#define RWLOCK_READERS(rwlock_ptr)	rwlock_readers(rwlock_ptr)
+
+extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
+{
+	signed int tmp = rwlock_ptr->lock;
+
+	if (tmp > 0)
+		return tmp;
+	else
+		return 0;
+}
+
+#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr)	((signed int)((rwlock_ptr)->lock) < 0)
+#define RWLOCK_IS_READ_LOCKED(rwlock_ptr)	((signed int)((rwlock_ptr)->lock) > 0)
+
+#define get_cycles64()	get_cycles()
+
+#endif /* _SPARC64_LOCKMETER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-sparc64/pgtable.h 901-mjb1.1/include/asm-sparc64/pgtable.h
--- 000-virgin/include/asm-sparc64/pgtable.h	Wed Mar 26 22:54:37 2003
+++ 901-mjb1.1/include/asm-sparc64/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -93,6 +93,7 @@
 /* Kernel has a separate 44bit address space. */
 #define USER_PTRS_PER_PGD	((const int)(test_thread_flag(TIF_32BIT)) ? \
 				 (1) : (PTRS_PER_PGD))
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 #define pte_ERROR(e)	__builtin_trap()
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-sparc64/spinlock.h 901-mjb1.1/include/asm-sparc64/spinlock.h
--- 000-virgin/include/asm-sparc64/spinlock.h	Sun Nov 17 20:29:27 2002
+++ 901-mjb1.1/include/asm-sparc64/spinlock.h	Wed Aug 13 20:29:36 2003
@@ -30,15 +30,23 @@
 
 #ifndef CONFIG_DEBUG_SPINLOCK
 
-typedef unsigned char spinlock_t;
-#define SPIN_LOCK_UNLOCKED	0
+typedef struct {
+	unsigned char lock;
+	unsigned int  index;
+} spinlock_t;
 
-#define spin_lock_init(lock)	(*((unsigned char *)(lock)) = 0)
-#define spin_is_locked(lock)	(*((volatile unsigned char *)(lock)) != 0)
+#ifdef CONFIG_LOCKMETER
+#define SPIN_LOCK_UNLOCKED	(spinlock_t) {0, 0}
+#else
+#define SPIN_LOCK_UNLOCKED	(spinlock_t) { 0 }
+#endif
+
+#define spin_lock_init(__lock)	do { *(__lock) = SPIN_LOCK_UNLOCKED; } while(0)
+#define spin_is_locked(__lock)	(*((volatile unsigned char *)(&((__lock)->lock))) != 0)
 
-#define spin_unlock_wait(lock)	\
+#define spin_unlock_wait(__lock)	\
 do {	membar("#LoadLoad");	\
-} while(*((volatile unsigned char *)lock))
+} while(*((volatile unsigned char *)(&(((spinlock_t *)__lock)->lock))))
 
 static __inline__ void _raw_spin_lock(spinlock_t *lock)
 {
@@ -109,8 +117,20 @@ extern int _spin_trylock (spinlock_t *lo
 
 #ifndef CONFIG_DEBUG_SPINLOCK
 
-typedef unsigned int rwlock_t;
-#define RW_LOCK_UNLOCKED	0
+#ifdef CONFIG_LOCKMETER
+typedef struct {
+	unsigned int lock;
+	unsigned int index;
+	unsigned int cpu;
+} rwlock_t;
+#define RW_LOCK_UNLOCKED       (rwlock_t) { 0, 0, 0xff }
+#else
+typedef struct {
+	unsigned int lock;
+} rwlock_t;
+#define RW_LOCK_UNLOCKED        (rwlock_t) { 0 }
+#endif
+
 #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0)
 #define rwlock_is_locked(x) (*(x) != RW_LOCK_UNLOCKED)
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-um/pgtable.h 901-mjb1.1/include/asm-um/pgtable.h
--- 000-virgin/include/asm-um/pgtable.h	Fri May 30 19:02:21 2003
+++ 901-mjb1.1/include/asm-um/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -40,6 +40,7 @@ extern unsigned long *empty_zero_page;
 #define PTRS_PER_PMD	1
 #define PTRS_PER_PGD	1024
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR       0
 
 #define pte_ERROR(e) \
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-x86_64/early_printk.h 901-mjb1.1/include/asm-x86_64/early_printk.h
--- 000-virgin/include/asm-x86_64/early_printk.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/asm-x86_64/early_printk.h	Wed Aug 13 21:05:43 2003
@@ -0,0 +1,8 @@
+#ifndef __X86_EARLY_PRINTK_H_X86_64_
+#define __X86_EARLY_PRINTK_H_X86_64_
+
+#define VGABASE	0xffffffff800b8000UL
+#define SERIAL_BASES { 0x3f8, 0x2f8 }
+#define SERIAL_BASES_LEN 2
+
+#endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/asm-x86_64/pgtable.h 901-mjb1.1/include/asm-x86_64/pgtable.h
--- 000-virgin/include/asm-x86_64/pgtable.h	Wed Jul  2 21:59:15 2003
+++ 901-mjb1.1/include/asm-x86_64/pgtable.h	Wed Aug 13 20:51:19 2003
@@ -112,6 +112,7 @@ static inline void set_pml4(pml4_t *dst,
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
+#define USER_PTRS_PER_PMD(x)	(PTRS_PER_PMD)
 #define FIRST_USER_PGD_NR	0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/compiler.h 901-mjb1.1/include/linux/compiler.h
--- 000-virgin/include/linux/compiler.h	Tue Aug  5 19:59:16 2003
+++ 901-mjb1.1/include/linux/compiler.h	Wed Aug 13 20:47:23 2003
@@ -78,6 +78,6 @@
    shouldn't recognize the original var, and make assumptions about it */
 #define RELOC_HIDE(ptr, off)					\
   ({ unsigned long __ptr;					\
-    __asm__ ("" : "=g"(__ptr) : "0"(ptr));		\
+    __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
     (typeof(ptr)) (__ptr + (off)); })
 #endif /* __LINUX_COMPILER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/early_printk.h 901-mjb1.1/include/linux/early_printk.h
--- 000-virgin/include/linux/early_printk.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/linux/early_printk.h	Wed Aug 13 21:05:43 2003
@@ -0,0 +1,47 @@
+#ifndef __X86_EARLY_PRINTK_H_
+#define __X86_EARLY_PRINTK_H_
+
+#ifdef CONFIG_X86_EARLY_PRINTK
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <asm/io.h>
+#include <asm/early_printk.h>
+
+/* Simple VGA output */
+
+#define MAX_YPOS	25
+#define MAX_XPOS	80
+
+/* Simple serial port output */
+
+#define DEFAULT_BAUD	57600
+#define XMTRDY		0x20
+
+#define DLAB		0x80
+
+#define TXR		0	/*  Transmit register (WRITE) */
+#define RXR		0	/*  Receive register  (READ)  */
+#define IER		1	/*  Interrupt Enable	  	*/
+#define IIR		2	/*  Interrupt ID		*/
+#define FCR		2	/*  FIFO control		*/
+#define LCR		3	/*  Line control		*/
+#define MCR		4	/*  Modem control		*/
+#define LSR		5	/*  Line Status			*/
+#define MSR		6	/*  Modem Status		*/
+#define DLL		0	/*  Divisor Latch Low	 	*/
+#define DLH		1	/*  Divisor latch High		*/
+
+
+void early_printk(const char *fmt, ...);
+int __init setup_early_printk(); 
+
+#else
+
+#define early_printk(...) do {} while(0)
+#define setup_early_printk() do {} while(0)
+
+#endif
+
+#endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/fs.h 901-mjb1.1/include/linux/fs.h
--- 000-virgin/include/linux/fs.h	Wed Aug 13 20:24:32 2003
+++ 901-mjb1.1/include/linux/fs.h	Wed Aug 13 20:51:50 2003
@@ -332,6 +332,9 @@ struct address_space {
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
+#ifdef CONFIG_NUMA
+	struct binding		*binding;	/* for memory bindings */
+#endif
 };
 
 struct block_device {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/gdb.h 901-mjb1.1/include/linux/gdb.h
--- 000-virgin/include/linux/gdb.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/linux/gdb.h	Wed Aug 13 20:29:29 2003
@@ -0,0 +1,67 @@
+#ifndef _GDB_H_
+#define _GDB_H_
+
+/*
+ * Copyright (C) 2001 Amit S. Kale
+ */
+
+/* gdb locks */
+#define KGDB_MAX_NO_CPUS NR_CPUS
+
+extern int gdb_enter;	/* 1 = enter debugger on boot */
+extern int gdb_ttyS;
+extern int gdb_baud;
+extern int gdb_initialized;
+
+extern int gdb_hook(void);
+extern void breakpoint(void);
+
+typedef int     gdb_debug_hook(int trapno,
+                               int signo,
+                               int err_code,
+                               struct pt_regs *regs);
+extern gdb_debug_hook  *linux_debug_hook;
+
+#ifdef CONFIG_SMP
+extern spinlock_t kgdb_spinlock;
+extern spinlock_t kgdb_nmispinlock;
+#else
+extern unsigned kgdb_spinlock;
+extern unsigned kgdb_nmispinlock;
+#endif
+
+extern volatile int kgdb_memerr_expected;
+
+struct console;
+void gdb_console_write(struct console *co, const char *s,
+				unsigned count);
+void gdb_console_init(void);
+
+extern volatile int procindebug[KGDB_MAX_NO_CPUS];
+
+#define KGDB_ASSERT(message, condition)	do {			\
+	if (!(condition)) {					\
+		printk("kgdb assertion failed: %s\n", message); \
+		asm ("int $0x3");				\
+	}							\
+} while (0)
+
+#ifdef CONFIG_KERNEL_ASSERTS
+#define KERNEL_ASSERT(message, condition) KGDB_ASSERT(message, condition)
+#else
+#define KERNEL_ASSERT(message, condition)
+#endif
+
+#define KA_VALID_ERRNO(errno) ((errno) > 0 && (errno) <= EMEDIUMTYPE)
+
+#define KA_VALID_PTR_ERR(ptr) KA_VALID_ERRNO(-PTR_ERR(ptr))
+
+#define KA_VALID_KPTR(ptr)  (!(ptr) ||	\
+	       ((void *)(ptr) >= (void *)PAGE_OFFSET &&  \
+	       (void *)(ptr) < ERR_PTR(-EMEDIUMTYPE)))
+
+#define KA_VALID_PTRORERR(errptr) (KA_VALID_KPTR(errptr) || KA_VALID_PTR_ERR(errptr))
+
+#define KA_HELD_GKL()	(current->lock_depth >= 0)
+
+#endif /* _GDB_H_ */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/gfp.h 901-mjb1.1/include/linux/gfp.h
--- 000-virgin/include/linux/gfp.h	Tue Jun 24 21:29:25 2003
+++ 901-mjb1.1/include/linux/gfp.h	Wed Aug 13 21:09:02 2003
@@ -32,6 +32,7 @@
 #define __GFP_NOFAIL	0x800	/* Retry for ever.  Cannot fail */
 #define __GFP_NORETRY	0x1000	/* Do not retry.  Might fail */
 #define __GFP_NO_GROW	0x2000	/* Slab internal usage */
+#define __GFP_NODE_STRICT 0x4000 /* Do not fall back to other nodes */
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
@@ -66,7 +67,7 @@ static inline struct page * alloc_pages_
 	if (unlikely(order >= MAX_ORDER))
 		return NULL;
 
-	return __alloc_pages(gfp_mask, order, NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+	return __alloc_pages(gfp_mask, order, get_node_zonelist(nid, gfp_mask));
 }
 
 #define alloc_pages(gfp_mask, order) \
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/kdev_t.h 901-mjb1.1/include/linux/kdev_t.h
--- 000-virgin/include/linux/kdev_t.h	Wed Aug 13 20:24:32 2003
+++ 901-mjb1.1/include/linux/kdev_t.h	Wed Aug 13 20:48:46 2003
@@ -70,13 +70,13 @@ aeb - 950811
  * static arrays, and they are sized for a 8-bit index.
  */
 typedef struct {
-	unsigned short value;
+	unsigned int value;
 } kdev_t;
 
-#define KDEV_MINOR_BITS		8
-#define KDEV_MAJOR_BITS		8
+#define KDEV_MINOR_BITS		16
+#define KDEV_MAJOR_BITS		16
 
-#define __mkdev(major,minor)	(((major) << KDEV_MINOR_BITS) + (minor))
+#define __mkdev(major, minor)	(((major) << KDEV_MINOR_BITS) + (minor))
 
 #define mk_kdev(major, minor)	((kdev_t) { __mkdev(major,minor) } )
 
@@ -107,17 +107,55 @@ static inline int kdev_same(kdev_t dev1,
 
 #define kdev_none(d1)	(!kdev_val(d1))
 
-/* Mask off the high bits for now.. */
-#define minor(dev)	((dev).value & 0xff)
-#define major(dev)	(((dev).value >> KDEV_MINOR_BITS) & 0xff)
+#define minor(dev)	((dev).value & 0xffff)
+#define major(dev)	(((dev).value >> KDEV_MINOR_BITS) & 0xffff)
 
 /* These are for user-level "dev_t" */
+/* Since glibc uses 8+8 in <include/sysmacros.h>, we'll get
+   incompatibilities with a simple scheme like 12+20.
+   Use 8+8 for 16-bit values, some other division, say 16+16,
+   for 32-bit values. */
 #define MINORBITS	8
 #define MINORMASK	((1U << MINORBITS) - 1)
 
-#define MAJOR(dev)	((unsigned int) ((dev) >> MINORBITS))
-#define MINOR(dev)	((unsigned int) ((dev) & MINORMASK))
-#define MKDEV(ma,mi)	(((ma) << MINORBITS) | (mi))
+#include <linux/types.h>	/* dev_t */
+#if 1
+/* macro versions */
+
+#define MAJOR(dev)	((unsigned int)(((dev) & 0xffff0000) ? ((dev) >> 16) & 0xffff : ((dev) >> 8) & 0xff))
+#define MINOR(dev)	((unsigned int)(((dev) & 0xffff0000) ? ((dev) & 0xffff) : ((dev) & 0xff)))
+#define MKDEV(ma,mi)	((dev_t)((((ma) & ~0xff) == 0 && ((mi) & ~0xff) == 0) ? (((ma) << 8) | (mi)) : (((ma) << 16) | (mi))))
+
+#else
+/* inline function versions */
+
+static inline unsigned int
+MAJOR(dev_t dev) {
+	unsigned int ma;
+
+	ma = ((dev >> 16) & 0xffff);
+	if (ma == 0)
+		ma = ((dev >> 8) & 0xff);
+	return ma;
+}
+
+static inline unsigned int
+MINOR(dev_t dev) {
+	unsigned int mi;
+
+	mi = (dev & 0xffff);
+	if (mi == dev)
+		mi = (dev & 0xff);
+	return mi;
+}
+
+static inline dev_t
+MKDEV(unsigned int ma, unsigned int mi) {
+	if ((ma & ~0xff) == 0 && (mi & ~0xff) == 0)
+		return ((ma << 8) | mi);
+	return ((ma << 16) | mi);
+}
+#endif
 
 /*
  * Conversion functions
@@ -125,12 +163,16 @@ static inline int kdev_same(kdev_t dev1,
 
 static inline int kdev_t_to_nr(kdev_t dev)
 {
-	return MKDEV(major(dev), minor(dev));
+	unsigned int ma = major(dev);
+	unsigned int mi = minor(dev);
+	return MKDEV(ma, mi);
 }
 
-static inline kdev_t to_kdev_t(int dev)
+static inline kdev_t to_kdev_t(dev_t dev)
 {
-	return mk_kdev(MAJOR(dev),MINOR(dev));
+	unsigned int ma = MAJOR(dev);
+	unsigned int mi = MINOR(dev);
+	return mk_kdev(ma, mi);
 }
 
 #define print_dev_t(buffer, dev)					\
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/lockmeter.h 901-mjb1.1/include/linux/lockmeter.h
--- 000-virgin/include/linux/lockmeter.h	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/include/linux/lockmeter.h	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,320 @@
+/*
+ *  Copyright (C) 1999-2002 Silicon Graphics, Inc.
+ *
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.h by Jack Steiner (steiner@sgi.com)
+ *  
+ *  Modified by Ray Bryant (raybry@us.ibm.com) Feb-Apr 2000
+ *  Changes Copyright (C) 2000 IBM, Inc.
+ *  Added save of index in spinlock_t to improve efficiency
+ *  of "hold" time reporting for spinlocks
+ *  Added support for hold time statistics for read and write
+ *  locks.
+ *  Moved machine dependent code to include/asm/lockmeter.h.
+ *
+ */
+
+#ifndef _LINUX_LOCKMETER_H
+#define _LINUX_LOCKMETER_H
+
+
+/*---------------------------------------------------
+ *	architecture-independent lockmeter.h
+ *-------------------------------------------------*/
+
+/* 
+ * raybry -- version 2: added efficient hold time statistics
+ *           requires lstat recompile, so flagged as new version
+ * raybry -- version 3: added global reader lock data
+ * hawkes -- version 4: removed some unnecessary fields to simplify mips64 port
+ */
+#define LSTAT_VERSION	5
+
+int	lstat_update(void*, void*, int);
+int	lstat_update_time(void*, void*, int, uint32_t);
+
+/*
+ * Currently, the mips64 and sparc64 kernels talk to a 32-bit lockstat, so we
+ * need to force compatibility in the inter-communication data structure.
+ */
+
+#if defined(CONFIG_MIPS32_COMPAT)
+#define TIME_T		uint32_t
+#elif defined(CONFIG_SPARC32_COMPAT)
+#define TIME_T		uint64_t
+#else
+#define TIME_T		time_t
+#endif
+
+#if defined(__KERNEL__) || (!defined(CONFIG_MIPS32_COMPAT) && !defined(CONFIG_SPARC32_COMPAT)) || (_MIPS_SZLONG==32)
+#define POINTER		void *
+#else
+#define	POINTER		int64_t
+#endif
+
+/*
+ * Values for the "action" parameter passed to lstat_update.
+ *	ZZZ - do we want a try-success status here??? 
+ */
+#define LSTAT_ACT_NO_WAIT	0
+#define LSTAT_ACT_SPIN		1
+#define LSTAT_ACT_REJECT	2
+#define LSTAT_ACT_WW_SPIN       3
+#define LSTAT_ACT_SLEPT		4 /* UNUSED */
+
+#define LSTAT_ACT_MAX_VALUES	4 /* NOTE: Increase to 5 if use ACT_SLEPT */
+
+/*
+ * Special values for the low 2 bits of an RA passed to
+ * lstat_update.
+ */
+/* we use these values to figure out what kind of lock data */
+/* is stored in the statistics table entry at index ....... */
+#define LSTAT_RA_SPIN           0  /* spin lock data */
+#define LSTAT_RA_READ           1  /* read lock statistics */
+#define LSTAT_RA_SEMA		2  /* RESERVED */
+#define LSTAT_RA_WRITE          3  /* write lock statistics*/
+
+#define LSTAT_RA(n)	\
+	((void*)( ((unsigned long)__builtin_return_address(0) & ~3) | n) )
+
+/*
+ * Constants used for lock addresses in the lstat_directory
+ * to indicate special values of the lock address. 
+ */
+#define	LSTAT_MULTI_LOCK_ADDRESS	NULL
+
+/*
+ * Maximum size of the lockstats tables. Increase this value 
+ * if its not big enough. (Nothing bad happens if its not
+ * big enough although some locks will not be monitored.)
+ * We record overflows of this quantity in lstat_control.dir_overflows
+ *
+ * Note:  The max value here must fit into the field set
+ * and obtained by the macro's PUT_INDEX() and GET_INDEX().
+ * This value depends on how many bits are available in the 
+ * lock word in the particular machine implementation we are on.
+ */
+#define LSTAT_MAX_STAT_INDEX		2000
+
+/* 
+ * Size and mask for the hash table into the directory.
+ */
+#define LSTAT_HASH_TABLE_SIZE		4096		/* must be 2**N */
+#define LSTAT_HASH_TABLE_MASK		(LSTAT_HASH_TABLE_SIZE-1)
+
+#define DIRHASH(ra)      ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK)
+
+/*
+ *	This defines an entry in the lockstat directory. It contains
+ *	information about a lock being monitored.
+ *	A directory entry only contains the lock identification - 
+ *	counts on usage of the lock are kept elsewhere in a per-cpu
+ *	data structure to minimize cache line pinging.
+ */
+typedef struct {
+	POINTER	caller_ra;		  /* RA of code that set lock */
+	POINTER	lock_ptr;		  /* lock address */
+	ushort	next_stat_index;  /* Used to link multiple locks that have the same hash table value */
+} lstat_directory_entry_t;
+
+/*
+ *	A multi-dimensioned array used to contain counts for lock accesses.
+ *	The array is 3-dimensional:
+ *		- CPU number. Keep from thrashing cache lines between CPUs
+ *		- Directory entry index. Identifies the lock
+ *		- Action. Indicates what kind of contention occurred on an
+ *		  access to the lock.
+ *
+ *	The index of an entry in the directory is the same as the 2nd index
+ *	of the entry in the counts array.
+ */
+/* 
+ *  This table contains data for spin_locks, write locks, and read locks
+ *  Not all data is used for all cases.  In particular, the hold time   
+ *  information is not stored here for read locks since that is a global
+ *  (e. g. cannot be separated out by return address) quantity. 
+ *  See the lstat_read_lock_counts_t structure for the global read lock
+ *  hold time.
+ */ 
+typedef struct {
+	uint64_t    cum_wait_ticks;	/* sum of wait times               */
+	                                /* for write locks, sum of time a  */
+					/* writer is waiting for a reader  */
+	int64_t	    cum_hold_ticks;	/* cumulative sum of holds         */
+	                                /* not used for read mode locks    */
+					/* must be signed. ............... */
+	uint32_t    max_wait_ticks;	/* max waiting time                */
+	uint32_t    max_hold_ticks;	/* max holding time                */
+	uint64_t    cum_wait_ww_ticks;  /* sum times writer waits on writer*/
+	uint32_t    max_wait_ww_ticks;  /* max wait time writer vs writer  */
+	                                /* prev 2 only used for write locks*/
+	uint32_t    acquire_time;       /* time lock acquired this CPU     */
+	uint32_t    count[LSTAT_ACT_MAX_VALUES];
+} lstat_lock_counts_t;
+
+typedef lstat_lock_counts_t	lstat_cpu_counts_t[LSTAT_MAX_STAT_INDEX];
+
+/*
+ * User request to:
+ *	- turn statistic collection on/off, or to reset
+ */
+#define LSTAT_OFF	 0
+#define LSTAT_ON	 1
+#define LSTAT_RESET      2
+#define LSTAT_RELEASE    3
+
+#define LSTAT_MAX_READ_LOCK_INDEX 1000
+typedef struct {
+	POINTER	    lock_ptr;            /* address of lock for output stats */
+	uint32_t    read_lock_count;          
+	int64_t     cum_hold_ticks;       /* sum of read lock hold times over */
+	                                  /* all callers. ....................*/
+	uint32_t    write_index;          /* last write lock hash table index */
+	uint32_t    busy_periods;         /* count of busy periods ended this */
+	uint64_t    start_busy;           /* time this busy period started. ..*/
+	uint64_t    busy_ticks;           /* sum of busy periods this lock. ..*/
+	uint64_t    max_busy;             /* longest busy period for this lock*/
+	uint32_t    max_readers;          /* maximum number of readers ...... */
+#ifdef USER_MODE_TESTING
+	rwlock_t    entry_lock;           /* lock for this read lock entry... */
+	                                  /* avoid having more than one rdr at*/
+	                                  /* needed for user space testing... */
+	                                  /* not needed for kernel 'cause it  */
+					  /* is non-preemptive. ............. */
+#endif
+} lstat_read_lock_counts_t;
+typedef lstat_read_lock_counts_t	lstat_read_lock_cpu_counts_t[LSTAT_MAX_READ_LOCK_INDEX];
+
+#if defined(__KERNEL__) || defined(USER_MODE_TESTING)
+
+#ifndef USER_MODE_TESTING
+#include <asm/lockmeter.h>
+#else
+#include "asm_newlockmeter.h"
+#endif
+
+/* 
+ * Size and mask for the hash table into the directory.
+ */
+#define LSTAT_HASH_TABLE_SIZE		4096		/* must be 2**N */
+#define LSTAT_HASH_TABLE_MASK		(LSTAT_HASH_TABLE_SIZE-1)
+
+#define DIRHASH(ra)      ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK)
+
+/*
+ * This version eliminates the per processor lock stack.  What we do is to
+ * store the index of the lock hash structure in unused bits in the lock  
+ * itself.  Then on unlock we can find the statistics record without doing
+ * any additional hash or lock stack lookup.  This works for spin_locks.  
+ * Hold time reporting is now basically as cheap as wait time reporting
+ * so we ignore the difference between LSTAT_ON_HOLD and LSTAT_ON_WAIT
+ * as in version 1.1.* of lockmeter.
+ *
+ * For rw_locks, we store the index of a global reader stats structure in 
+ * the lock and the writer index is stored in the latter structure.       
+ * For read mode locks we hash at the time of the lock to find an entry  
+ * in the directory for reader wait time and the like.
+ * At unlock time for read mode locks, we update just the global structure
+ * so we don't need to know the reader directory index value at unlock time.
+ *
+ */
+
+/* 
+ * Protocol to change lstat_control.state
+ *   This is complicated because we don't want the cum_hold_time for
+ * a rw_lock to be decremented in _read_lock_ without making sure it
+ * is incremented in _read_lock_ and vice versa.  So here is the    
+ * way we change the state of lstat_control.state:                  
+ * I.  To Turn Statistics On
+ *     After allocating storage, set lstat_control.state non-zero.
+ * This works because we don't start updating statistics for in use
+ * locks until the reader lock count goes to zero.
+ * II. To Turn Statistics Off:
+ * (0)  Disable interrupts on this CPU                                          
+ * (1)  Seize the lstat_control.directory_lock                            
+ * (2)  Obtain the current value of lstat_control.next_free_read_lock_index   
+ * (3)  Store a zero in lstat_control.state.
+ * (4)  Release the lstat_control.directory_lock                          
+ * (5)  For each lock in the read lock list up to the saved value   
+ *      (well, -1) of the next_free_read_lock_index, do the following:        
+ *      (a)  Check validity of the stored lock address
+ *           by making sure that the word at the saved addr
+ *           has an index that matches this entry.  If not 
+ *           valid, then skip this entry.
+ *      (b)  If there is a write lock already set on this lock,
+ *           skip to (d) below.
+ *      (c)  Set a non-metered write lock on the lock          
+ *      (d)  set the cached INDEX in the lock to zero
+ *      (e)  Release the non-metered write lock.                    
+ * (6)  Re-enable interrupts
+ *
+ * These rules ensure that a read lock will not have its statistics      
+ * partially updated even though the global lock recording state has    
+ * changed.  See put_lockmeter_info() for implementation.
+ *
+ * The reason for (b) is that there may be write locks set on the
+ * syscall path to put_lockmeter_info() from user space.  If we do
+ * not do this check, then we can deadlock.  A similar problem would
+ * occur if the lock was read locked by the current CPU.  At the 
+ * moment this does not appear to happen.
+ */
+
+/*
+ * Main control structure for lockstat. Used to turn statistics on/off
+ * and to maintain directory info.
+ */
+typedef struct {
+	int				state;
+	spinlock_t		control_lock;		/* used to serialize turning statistics on/off   */
+	spinlock_t		directory_lock;		/* for serialize adding entries to directory     */
+	volatile int	next_free_dir_index;/* next free entry in the directory */
+	/* FIXME not all of these fields are used / needed .............. */
+                /* the following fields represent data since     */
+		/* first "lstat on" or most recent "lstat reset" */
+	TIME_T      first_started_time;     /* time when measurement first enabled */
+	TIME_T      started_time;           /* time when measurement last started  */
+	TIME_T      ending_time;            /* time when measurement last disabled */
+	uint64_t    started_cycles64;       /* cycles when measurement last started          */
+	uint64_t    ending_cycles64;        /* cycles when measurement last disabled         */
+	uint64_t    enabled_cycles64;       /* total cycles with measurement enabled         */
+	int         intervals;              /* number of measurement intervals recorded      */
+	                                    /* i. e. number of times did lstat on;lstat off  */
+	lstat_directory_entry_t	*dir;		/* directory */
+	int         dir_overflow;           /* count of times ran out of space in directory  */
+	int         rwlock_overflow;        /* count of times we couldn't allocate a rw block*/
+	ushort		*hashtab;		 	    /* hash table for quick dir scans */
+	lstat_cpu_counts_t	*counts[NR_CPUS];	 /* Array of pointers to per-cpu stats */
+    int         next_free_read_lock_index;   /* next rwlock reader (global) stats block  */
+    lstat_read_lock_cpu_counts_t *read_lock_counts[NR_CPUS]; /* per cpu read lock stats  */
+} lstat_control_t;
+
+#endif	/* defined(__KERNEL__) || defined(USER_MODE_TESTING) */
+
+typedef struct {
+	short		lstat_version;		/* version of the data */
+	short		state;			/* the current state is returned */
+	int		maxcpus;		/* Number of cpus present */
+	int		next_free_dir_index;	/* index of the next free directory entry */
+	TIME_T          first_started_time;	/* when measurement enabled for first time */
+	TIME_T          started_time;		/* time in secs since 1969 when stats last turned on  */
+	TIME_T		ending_time;		/* time in secs since 1969 when stats last turned off */
+	uint32_t	cycleval;		/* cycles per second */
+#ifdef notyet
+	void		*kernel_magic_addr;	/* address of kernel_magic */
+	void		*kernel_end_addr;	/* contents of kernel magic (points to "end") */
+#endif
+	int              next_free_read_lock_index; /* index of next (global) read lock stats struct */
+	uint64_t         started_cycles64;	/* cycles when measurement last started        */
+	uint64_t         ending_cycles64;	/* cycles when stats last turned off           */
+	uint64_t         enabled_cycles64;	/* total cycles with measurement enabled       */
+	int              intervals;		/* number of measurement intervals recorded      */
+						/* i.e. number of times we did lstat on;lstat off*/
+	int              dir_overflow;		/* number of times we wanted more space in directory */
+	int              rwlock_overflow;	/* # of times we wanted more space in read_locks_count */
+	struct new_utsname   uts;		/* info about machine where stats are measured */
+						/* -T option of lockstat allows data to be     */
+						/* moved to another machine. ................. */
+} lstat_user_request_t;
+
+#endif /* _LINUX_LOCKMETER_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/mm.h 901-mjb1.1/include/linux/mm.h
--- 000-virgin/include/linux/mm.h	Wed Aug 13 20:24:32 2003
+++ 901-mjb1.1/include/linux/mm.h	Wed Aug 13 20:51:52 2003
@@ -179,6 +179,7 @@ struct page {
 		struct pte_chain *chain;/* Reverse pte mapping pointer.
 					 * protected by PG_chainlock */
 		pte_addr_t direct;
+		int mapcount;
 	} pte;
 	unsigned long private;		/* mapping-private opaque data */
 
@@ -610,6 +611,39 @@ extern struct page * follow_page(struct 
 		int write);
 extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
 		unsigned long to, unsigned long size, pgprot_t prot);
+
+/* 
+ * Given a struct page, determine which node's memory it is from.
+ * TODO: There's probably a more efficient way to do this...
+ */
+static inline int page_to_nid(struct page *page)
+{
+	return pfn_to_nid(page_to_pfn(page));
+}
+
+#ifdef CONFIG_NUMA
+static inline void zero_rss(struct mm_struct *mm)
+{
+	mm->rss = 0;
+	memset(mm->pernode_rss, 0, MAX_NUMNODES * sizeof(*mm->pernode_rss));
+}
+
+static inline void inc_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss++;
+	mm->pernode_rss[page_to_nid(page)]++;
+}
+
+static inline void dec_rss(struct mm_struct *mm, struct page *page)
+{
+	mm->rss--;
+	mm->pernode_rss[page_to_nid(page)]--;
+}
+#else /* !CONFIG_NUMA */
+#define zero_rss(mm)		((mm)->rss = 0)
+#define inc_rss(mm, page)	((mm)->rss++)
+#define dec_rss(mm, page)	((mm)->rss--)
+#endif /* CONFIG_NUMA */
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/mmzone.h 901-mjb1.1/include/linux/mmzone.h
--- 000-virgin/include/linux/mmzone.h	Wed Aug 13 20:24:32 2003
+++ 901-mjb1.1/include/linux/mmzone.h	Wed Aug 13 20:51:53 2003
@@ -300,6 +300,7 @@ extern struct pglist_data contig_page_da
 #define NODE_DATA(nid)		(&contig_page_data)
 #define NODE_MEM_MAP(nid)	mem_map
 #define MAX_NR_NODES		1
+#define pfn_to_nid(pfn)		(0)
 #else /* CONFIG_DISCONTIGMEM */
 
 #include <asm/mmzone.h>
@@ -362,6 +363,19 @@ static inline unsigned int num_online_me
 #define num_online_memblks()		1
 
 #endif /* CONFIG_DISCONTIGMEM || CONFIG_NUMA */
+
+static inline struct zonelist *get_node_zonelist(int nid, int gfp_mask)
+{
+	return NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK);
+}
+
+#define get_zonelist(gfp_mask) get_node_zonelist(numa_node_id(), gfp_mask)
+
+/* Structure to keep track of memory segment (VMA) bindings */
+struct binding {
+	struct zonelist	zonelist;
+};
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MMZONE_H */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/module.h 901-mjb1.1/include/linux/module.h
--- 000-virgin/include/linux/module.h	Tue Aug  5 20:01:55 2003
+++ 901-mjb1.1/include/linux/module.h	Wed Aug 13 20:51:56 2003
@@ -257,6 +257,11 @@ struct module
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
+
+#ifdef CONFIG_GCOV_PROFILE
+	const char *ctors_start;        /* Pointer to start of .ctors-section */
+	const char *ctors_end;          /* Pointer to end of .ctors-section */
+#endif
 };
 
 /* FIXME: It'd be nice to isolate modules during init, too, so they
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/nfsd/syscall.h 901-mjb1.1/include/linux/nfsd/syscall.h
--- 000-virgin/include/linux/nfsd/syscall.h	Sat May 10 18:35:03 2003
+++ 901-mjb1.1/include/linux/nfsd/syscall.h	Wed Aug 13 20:48:46 2003
@@ -59,7 +59,7 @@ struct nfsctl_client {
 struct nfsctl_export {
 	char			ex_client[NFSCLNT_IDMAX+1];
 	char			ex_path[NFS_MAXPATHLEN+1];
-	__kernel_old_dev_t	ex_dev;
+	u16			ex_dev;
 	__kernel_ino_t		ex_ino;
 	int			ex_flags;
 	__kernel_uid_t		ex_anon_uid;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/page-flags.h 901-mjb1.1/include/linux/page-flags.h
--- 000-virgin/include/linux/page-flags.h	Tue Jun 24 21:29:26 2003
+++ 901-mjb1.1/include/linux/page-flags.h	Wed Aug 13 20:29:24 2003
@@ -75,6 +75,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_compound		19	/* Part of a compound page */
+#define PG_anon			20	/* Anonymous page */
 
 
 /*
@@ -266,6 +267,10 @@ extern void get_full_page_state(struct p
 #define PageCompound(page)	test_bit(PG_compound, &(page)->flags)
 #define SetPageCompound(page)	set_bit(PG_compound, &(page)->flags)
 #define ClearPageCompound(page)	clear_bit(PG_compound, &(page)->flags)
+
+#define PageAnon(page)		test_bit(PG_anon, &(page)->flags)
+#define SetPageAnon(page)	set_bit(PG_anon, &(page)->flags)
+#define ClearPageAnon(page)	clear_bit(PG_anon, &(page)->flags)
 
 /*
  * The PageSwapCache predicate doesn't use a PG_flag at this time,
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/pagemap.h 901-mjb1.1/include/linux/pagemap.h
--- 000-virgin/include/linux/pagemap.h	Fri May 30 19:02:23 2003
+++ 901-mjb1.1/include/linux/pagemap.h	Wed Aug 13 20:51:50 2003
@@ -27,14 +27,37 @@
 #define page_cache_release(page)	put_page(page)
 void release_pages(struct page **pages, int nr, int cold);
 
+#ifndef CONFIG_NUMA
+
+static inline struct page *__page_cache_alloc(struct address_space *x, int gfp_mask)
+{
+	return alloc_pages(gfp_mask, 0);
+}
+
+#else /* CONFIG_NUMA */
+
+static inline struct page *__page_cache_alloc(struct address_space *x, int gfp_mask)
+{
+	struct zonelist *zonelist;
+
+	if (!x->binding)
+		zonelist = get_zonelist(gfp_mask);
+	else
+		zonelist = &x->binding->zonelist;
+
+	return __alloc_pages(gfp_mask, 0, zonelist);
+}
+
+#endif /* !CONFIG_NUMA */
+
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return alloc_pages(x->gfp_mask, 0);
+	return __page_cache_alloc(x, x->gfp_mask);
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
-	return alloc_pages(x->gfp_mask|__GFP_COLD, 0);
+	return __page_cache_alloc(x, x->gfp_mask|__GFP_COLD);
 }
 
 typedef int filler_t(void *, struct page *);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/pci.h 901-mjb1.1/include/linux/pci.h
--- 000-virgin/include/linux/pci.h	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/include/linux/pci.h	Wed Aug 13 20:47:25 2003
@@ -459,10 +459,10 @@ struct pci_bus {
 	void		*sysdata;	/* hook for sys-specific extension */
 	struct proc_dir_entry *procdir;	/* directory entry in /proc/bus/pci */
 
-	unsigned char	number;		/* bus number */
-	unsigned char	primary;	/* number of primary bridge */
-	unsigned char	secondary;	/* number of secondary bridge */
-	unsigned char	subordinate;	/* max number of subordinate buses */
+	unsigned int	number;		/* bus number */
+	unsigned int	primary;	/* number of primary bridge */
+	unsigned int	secondary;	/* number of secondary bridge */
+	unsigned int	subordinate;	/* max number of subordinate buses */
 
 	char		name[48];
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/sched.h 901-mjb1.1/include/linux/sched.h
--- 000-virgin/include/linux/sched.h	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/include/linux/sched.h	Wed Aug 13 20:51:52 2003
@@ -70,7 +70,11 @@ struct exec_domain;
  *    the EXP_n values would be 1981, 2034 and 2043 if still using only
  *    11 bit fractions.
  */
-extern unsigned long avenrun[];		/* Load averages */
+extern unsigned long avenrun[];				/* Load averages */
+extern unsigned long tasks_running[3]; 			/* Real load averages */
+DECLARE_PER_CPU(unsigned long[3],cpu_tasks_running);	/* Real load averages per cpu */
+
+extern unsigned long tasks_running[];	/* Real load averages */
 
 #define FSHIFT		11		/* nr of bits of precision */
 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
@@ -92,9 +96,13 @@ extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
+extern unsigned long nr_running_cpu(int i);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
 
+struct sched_info;
+extern void cpu_sched_info(struct sched_info *, int);
+
 #include <linux/time.h>
 #include <linux/param.h>
 #include <linux/resource.h>
@@ -172,7 +180,13 @@ extern unsigned long cache_decay_ticks;
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long FASTCALL(schedule_timeout(signed long timeout));
-asmlinkage void schedule(void);
+#ifdef CONFIG_KGDB_THREAD
+ asmlinkage void do_schedule(void);
+ asmlinkage void kern_schedule(void);
+ asmlinkage void kern_do_schedule(struct pt_regs);
+#else
+ asmlinkage void schedule(void);
+#endif
 
 struct namespace;
 
@@ -191,7 +205,7 @@ struct mm_struct {
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
 	struct rw_semaphore mmap_sem;
-	spinlock_t page_table_lock;		/* Protects task page tables and mm->rss */
+	spinlock_t page_table_lock;		/* Protects task page tables and RSS data */
 
 	struct list_head mmlist;		/* List of all active mm's.  These are globally strung
 						 * together off init_mm.mmlist, and are protected
@@ -201,7 +215,11 @@ struct mm_struct {
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
 	unsigned long arg_start, arg_end, env_start, env_end;
-	unsigned long rss, total_vm, locked_vm;
+	unsigned long total_vm, locked_vm;
+	unsigned long rss;
+#ifdef CONFIG_NUMA
+	unsigned long pernode_rss[MAX_NUMNODES];
+#endif
 	unsigned long def_flags;
 	unsigned long cpu_vm_mask;
 	unsigned long swap_address;
@@ -321,6 +339,13 @@ struct k_itimer {
 	struct sigqueue *sigq;		/* signal queue entry. */
 };
 
+struct sched_info {
+	/* running averages */
+	unsigned long response_time, inter_arrival_time, service_time;
+
+	/* timestamps */
+	unsigned long last_arrival, began_service;
+};
 
 struct io_context;			/* See blkdev.h */
 void exit_io_context(void);
@@ -345,6 +370,8 @@ struct task_struct {
 	unsigned long cpus_allowed;
 	unsigned int time_slice, first_time_slice;
 
+	struct sched_info sched_info;
+
 	struct list_head tasks;
 	struct list_head ptrace_children;
 	struct list_head ptrace_list;
@@ -497,7 +524,7 @@ static inline int set_cpus_allowed(task_
 }
 #endif
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 extern void sched_balance_exec(void);
 extern void node_nr_running_init(void);
 #else
@@ -532,6 +559,7 @@ extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
 
+extern int find_next_pid(int pid);
 extern struct task_struct *find_task_by_pid(int pid);
 extern void set_special_pids(pid_t session, pid_t pgrp);
 extern void __set_special_pids(pid_t session, pid_t pgrp);
@@ -715,6 +743,12 @@ static inline int thread_group_empty(tas
 		(thread_group_leader(p) && !thread_group_empty(p))
 
 extern void unhash_process(struct task_struct *p);
+
+#ifdef CONFIG_KGDB_THREAD
+#define schedule() kern_schedule()
+#else
+#define user_schedule() schedule()
+#endif
 
 /* Protects ->fs, ->files, ->mm, and synchronises with wait4().
  * Nests both inside and outside of read_lock(&tasklist_lock).
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/spinlock.h 901-mjb1.1/include/linux/spinlock.h
--- 000-virgin/include/linux/spinlock.h	Wed Jul  2 21:59:15 2003
+++ 901-mjb1.1/include/linux/spinlock.h	Wed Aug 13 20:48:54 2003
@@ -184,6 +184,17 @@ typedef struct {
 
 #endif /* !SMP */
 
+#ifdef CONFIG_LOCKMETER
+extern void _metered_spin_lock   (spinlock_t *lock);
+extern void _metered_spin_unlock (spinlock_t *lock);
+extern int  _metered_spin_trylock(spinlock_t *lock);
+extern void _metered_read_lock    (rwlock_t *lock);
+extern void _metered_read_unlock  (rwlock_t *lock);
+extern void _metered_write_lock   (rwlock_t *lock);
+extern void _metered_write_unlock (rwlock_t *lock);
+extern int  _metered_write_trylock(rwlock_t *lock);
+#endif
+
 /*
  * Define the various spin_lock and rw_lock methods.  Note we define these
  * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
@@ -388,6 +399,141 @@ do { \
 #define spin_trylock_bh(lock)	({ local_bh_disable(); preempt_disable(); \
 				_raw_spin_trylock(lock) ? 1 : \
 				({preempt_enable(); local_bh_enable(); 0;});})
+
+#ifdef CONFIG_LOCKMETER
+#undef spin_lock
+#undef spin_trylock
+#undef spin_unlock
+#undef spin_lock_irqsave
+#undef spin_lock_irq
+#undef spin_lock_bh
+#undef read_lock
+#undef read_unlock
+#undef write_lock
+#undef write_unlock
+#undef write_trylock
+#undef spin_unlock_bh
+#undef read_lock_irqsave
+#undef read_lock_irq
+#undef read_lock_bh
+#undef read_unlock_bh
+#undef write_lock_irqsave
+#undef write_lock_irq
+#undef write_lock_bh
+#undef write_unlock_bh
+
+#define spin_lock(lock) \
+do { \
+	preempt_disable(); \
+	_metered_spin_lock(lock); \
+} while(0)
+
+#define spin_trylock(lock)     ({preempt_disable(); _metered_spin_trylock(lock) ? \
+				1 : ({preempt_enable(); 0;});})
+#define spin_unlock(lock) \
+do { \
+	_metered_spin_unlock(lock); \
+	preempt_enable(); \
+} while (0)
+
+#define spin_lock_irqsave(lock, flags) \
+do { \
+	local_irq_save(flags); \
+	preempt_disable(); \
+	_metered_spin_lock(lock); \
+} while (0)
+
+#define spin_lock_irq(lock) \
+do { \
+	local_irq_disable(); \
+	preempt_disable(); \
+	_metered_spin_lock(lock); \
+} while (0)
+
+#define spin_lock_bh(lock) \
+do { \
+	local_bh_disable(); \
+	preempt_disable(); \
+	_metered_spin_lock(lock); \
+} while (0)
+
+#define spin_unlock_bh(lock) \
+do { \
+	_metered_spin_unlock(lock); \
+	preempt_enable(); \
+	local_bh_enable(); \
+} while (0)
+
+
+#define read_lock(lock)                ({preempt_disable(); _metered_read_lock(lock);})
+#define read_unlock(lock)      ({_metered_read_unlock(lock); preempt_enable();})
+#define write_lock(lock)       ({preempt_disable(); _metered_write_lock(lock);})
+#define write_unlock(lock)     ({_metered_write_unlock(lock); preempt_enable();})
+#define write_trylock(lock)    ({preempt_disable();_metered_write_trylock(lock) ? \
+				1 : ({preempt_enable(); 0;});})
+#define spin_unlock_no_resched(lock) \
+do { \
+	_metered_spin_unlock(lock); \
+	preempt_enable_no_resched(); \
+} while (0)
+
+#define read_lock_irqsave(lock, flags) \
+do { \
+	local_irq_save(flags); \
+	preempt_disable(); \
+	_metered_read_lock(lock); \
+} while (0)
+
+#define read_lock_irq(lock) \
+do { \
+	local_irq_disable(); \
+	preempt_disable(); \
+	_metered_read_lock(lock); \
+} while (0)
+
+#define read_lock_bh(lock) \
+do { \
+	local_bh_disable(); \
+	preempt_disable(); \
+	_metered_read_lock(lock); \
+} while (0)
+
+#define read_unlock_bh(lock) \
+do { \
+	_metered_read_unlock(lock); \
+	preempt_enable(); \
+	local_bh_enable(); \
+} while (0)
+
+#define write_lock_irqsave(lock, flags) \
+do { \
+	local_irq_save(flags); \
+	preempt_disable(); \
+	_metered_write_lock(lock); \
+} while (0)
+
+#define write_lock_irq(lock) \
+do { \
+	local_irq_disable(); \
+	preempt_disable(); \
+	_metered_write_lock(lock); \
+} while (0)
+
+#define write_lock_bh(lock) \
+do { \
+	local_bh_disable(); \
+	preempt_disable(); \
+	_metered_write_lock(lock); \
+} while (0)
+
+#define write_unlock_bh(lock) \
+do { \
+	_metered_write_unlock(lock); \
+	preempt_enable(); \
+	local_bh_enable(); \
+} while (0)
+
+#endif /* !CONFIG_LOCKMETER */
 
 /* "lock on reference count zero" */
 #ifndef ATOMIC_DEC_AND_LOCK
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/swap.h 901-mjb1.1/include/linux/swap.h
--- 000-virgin/include/linux/swap.h	Fri May 30 19:02:23 2003
+++ 901-mjb1.1/include/linux/swap.h	Wed Aug 13 20:29:24 2003
@@ -186,6 +186,8 @@ struct pte_chain *FASTCALL(page_add_rmap
 void FASTCALL(page_remove_rmap(struct page *, pte_t *));
 int FASTCALL(try_to_unmap(struct page *));
 
+int page_convert_anon(struct page *);
+
 /* linux/mm/shmem.c */
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
 #else
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/sysctl.h 901-mjb1.1/include/linux/sysctl.h
--- 000-virgin/include/linux/sysctl.h	Tue Aug  5 20:01:43 2003
+++ 901-mjb1.1/include/linux/sysctl.h	Wed Aug 13 20:27:51 2003
@@ -60,7 +60,8 @@ enum
 	CTL_DEV=7,		/* Devices */
 	CTL_BUS=8,		/* Busses */
 	CTL_ABI=9,		/* Binary emulation */
-	CTL_CPU=10		/* CPU stuff (speed scaling, etc) */
+	CTL_CPU=10,		/* CPU stuff (speed scaling, etc) */
+	CTL_SCHED=11,		/* scheduler tunables */
 };
 
 /* CTL_BUS names: */
@@ -155,6 +156,21 @@ enum
 	VM_MIN_FREE_KBYTES=21,	/* Minimum free kilobytes to maintain */
 };
 
+/* Tunable scheduler parameters in /proc/sys/sched/ */
+enum {
+	SCHED_MIN_TIMESLICE=1,		/* minimum process timeslice */
+	SCHED_MAX_TIMESLICE=2,		/* maximum process timeslice */
+	SCHED_CHILD_PENALTY=3,		/* penalty on fork to child */
+	SCHED_PARENT_PENALTY=4,		/* penalty on fork to parent */
+	SCHED_EXIT_WEIGHT=5,		/* penalty to parent of CPU hog child */
+	SCHED_PRIO_BONUS_RATIO=6,	/* percent of max prio given as bonus */
+	SCHED_INTERACTIVE_DELTA=7,	/* delta used to scale interactivity */
+	SCHED_MAX_SLEEP_AVG=8,		/* maximum sleep avg attainable */
+	SCHED_STARVATION_LIMIT=9,	/* no re-active if expired is starved */
+	SCHED_NODE_THRESHOLD=10,	/* NUMA node rebalance threshold */
+	SCHED_IDLE_NODE_REBALANCE_RATIO=11,  /* how often to global balance */
+	SCHED_BUSY_NODE_REBALANCE_RATIO=12,  /* how often to global balance */
+};
 
 /* CTL_NET names: */
 enum
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/include/linux/timex.h 901-mjb1.1/include/linux/timex.h
--- 000-virgin/include/linux/timex.h	Tue Jun 24 21:29:26 2003
+++ 901-mjb1.1/include/linux/timex.h	Wed Aug 13 20:27:41 2003
@@ -78,7 +78,7 @@
 #elif HZ >= 768 && HZ < 1536
 # define SHIFT_HZ	10
 #else
-# error You lose.
+# error Please use a HZ value which is between 12 and 1536 
 #endif
 
 /*
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/init/main.c 901-mjb1.1/init/main.c
--- 000-virgin/init/main.c	Tue Aug  5 20:01:56 2003
+++ 901-mjb1.1/init/main.c	Wed Aug 13 20:51:56 2003
@@ -37,6 +37,7 @@
 #include <linux/moduleparam.h>
 #include <linux/writeback.h>
 #include <linux/cpu.h>
+#include <linux/early_printk.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -55,6 +56,10 @@
 #include <asm/smp.h>
 #endif
 
+#ifdef CONFIG_X86_REMOTE_DEBUG
+#include <linux/gdb.h>
+#endif
+
 /*
  * Versions of gcc older than that listed below may actually compile
  * and link okay, but the end product can have subtle run time bugs.
@@ -115,6 +120,10 @@ char *execute_command;
 /* Setup configured maximum number of CPUs to activate */
 static unsigned int max_cpus = NR_CPUS;
 
+#if defined(CONFIG_GCOV_PROFILE) && (defined(CONFIG_PPC32) || defined(CONFIG_PPC64))
+void __bb_fork_func (void) { }
+#endif
+
 /*
  * Setup routine for controlling SMP activation
  *
@@ -377,6 +386,7 @@ static void rest_init(void)
 /*
  *	Activate the first processor.
  */
+int kgdb_not_ready_yet;
 
 asmlinkage void __init start_kernel(void)
 {
@@ -389,6 +399,8 @@ asmlinkage void __init start_kernel(void
  */
 	lock_kernel();
 	printk(linux_banner);
+	setup_early_printk();
+	
 	setup_arch(&command_line);
 	setup_per_zone_pages_min();
 	setup_per_cpu_areas();
@@ -461,6 +473,14 @@ asmlinkage void __init start_kernel(void
 	 *	make syscalls (and thus be locked).
 	 */
 	init_idle(current, smp_processor_id());
+
+#ifdef CONFIG_X86_REMOTE_DEBUG
+	if (gdb_enter) {
+		kgdb_not_ready_yet = 1;
+		gdb_hook();		/* right at boot time */
+		kgdb_not_ready_yet = 0;
+	}
+#endif
 
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/Makefile 901-mjb1.1/kernel/Makefile
--- 000-virgin/kernel/Makefile	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/kernel/Makefile	Wed Aug 13 21:05:43 2003
@@ -8,9 +8,16 @@ obj-y     = sched.o fork.o exec_domain.o
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o
 
+ifdef CONFIG_GCOV_PROFILE
+obj-y += gcov.o
+export-objs += gcov.o
+CFLAGS_gcov.o := -DGCOV_PATH='"$(TOPDIR)"'
+endif
+
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o
+obj-$(CONFIG_LOCKMETER) += lockmeter.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += ksyms.o module.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
@@ -19,6 +26,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_IKCONFIG) += configs.o
+obj-$(CONFIG_X86_EARLY_PRINTK) += early_printk.o
 
 # files to be removed upon make clean
 clean-files := ikconfig.h
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/early_printk.c 901-mjb1.1/kernel/early_printk.c
--- 000-virgin/kernel/early_printk.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/kernel/early_printk.c	Wed Aug 13 20:25:53 2003
@@ -0,0 +1,218 @@
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/early_printk.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgtable.h>
+
+/* Simple VGA output */
+
+#define MAX_YPOS	25
+#define MAX_XPOS	80
+
+static int current_ypos = 1, current_xpos = 0; 
+
+static void early_vga_write(struct console *con, const char *str, unsigned n)
+{
+	char c;
+	int  i, k, j;
+
+	while ((c = *str++) != '\0' && n-- > 0) {
+		if (current_ypos >= MAX_YPOS) {
+			/* scroll 1 line up */
+			for(k = 1, j = 0; k < MAX_YPOS; k++, j++) {
+				for(i = 0; i < MAX_XPOS; i++) {
+					writew(readw(VGABASE + 2*(MAX_XPOS*k + i)),
+					       VGABASE + 2*(MAX_XPOS*j + i));
+				}
+			}
+			for(i = 0; i < MAX_XPOS; i++) {
+				writew(0x720, VGABASE + 2*(MAX_XPOS*j + i));
+			}
+			current_ypos = MAX_YPOS-1;
+		}
+		if (c == '\n') {
+			current_xpos = 0;
+			current_ypos++;
+		} else if (c != '\r')  {
+			writew(((0x7 << 8) | (unsigned short) c),
+			       VGABASE + 2*(MAX_XPOS*current_ypos + current_xpos++));
+			if (current_xpos >= MAX_XPOS) {
+				current_xpos = 0;
+				current_ypos++;
+			}
+		}
+	}
+}
+
+static struct console early_vga_console = {
+	.name =		"earlyvga",
+	.write =	early_vga_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
+/* Serial functions losely based on a similar package from Klaus P. Gerlicher */ 
+
+int early_serial_base;  /* ttyS0 */ 
+
+static int early_serial_putc(unsigned char ch) 
+{ 
+	unsigned timeout = 0xffff; 
+	while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) 
+		rep_nop(); 
+	outb(ch, early_serial_base + TXR);
+	return timeout ? 0 : -1;
+} 
+
+static void early_serial_write(struct console *con, const char *s, unsigned n)
+{
+	while (*s && n-- > 0) { 
+		early_serial_putc(*s); 
+		if (*s == '\n') 
+			early_serial_putc('\r'); 
+		s++; 
+	} 
+} 
+
+static __init void early_serial_init(char *opt)
+{
+	unsigned char c; 
+	unsigned divisor, baud = DEFAULT_BAUD;
+	static int bases[] = SERIAL_BASES;
+	char *s, *e;
+
+	early_serial_base = bases[0];
+	
+	if (*opt == ',') 
+		++opt;
+
+	s = strsep(&opt, ","); 
+	if (s != NULL) { 
+		unsigned port; 
+		if (!strncmp(s,"0x",2))
+			early_serial_base = simple_strtoul(s, &e, 16);
+		else {	
+			if (!strncmp(s,"ttyS",4)) 
+				s+=4; 
+			port = simple_strtoul(s, &e, 10); 
+			if (port > (SERIAL_BASES_LEN-1) || s == e) 
+				port = 0; 
+			early_serial_base = bases[port];
+		}
+	}
+
+	outb(0x3, early_serial_base + LCR); /* 8n1 */
+	outb(0, early_serial_base + IER); /* no interrupt */ 
+	outb(0, early_serial_base + FCR); /* no fifo */ 
+	outb(0x3, early_serial_base + MCR); /* DTR + RTS */ 
+
+	s = strsep(&opt, ","); 
+	if (s != NULL) { 
+		baud = simple_strtoul(s, &e, 0); 
+		if (baud == 0 || s == e) 
+			baud = DEFAULT_BAUD;
+	} 
+	
+	divisor = 115200 / baud; 
+	c = inb(early_serial_base + LCR); 
+	outb(c | DLAB, early_serial_base + LCR); 
+	outb(divisor & 0xff, early_serial_base + DLL); 
+	outb((divisor >> 8) & 0xff, early_serial_base +	DLH);
+	outb(c & ~DLAB, early_serial_base + LCR);
+}
+
+static struct console early_serial_console = {
+	.name =		"earlyser",
+	.write =	early_serial_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
+/* Direct interface for emergencies */
+struct console *early_console = &early_vga_console;
+static int early_console_initialized = 0;
+
+void early_printk(const char *fmt, ...)
+{ 
+	char buf[512]; 
+	int n; 
+	va_list ap;
+	va_start(ap,fmt); 
+	n = vsnprintf(buf,512,fmt,ap);
+	early_console->write(early_console,buf,n);
+	va_end(ap); 
+} 
+
+static int keep_early; 
+
+int __init setup_early_printk(void) 
+{  
+	char *space, *s;
+	char buf[256];
+	char cmd[COMMAND_LINE_SIZE];
+	char *opt;
+
+	/* Get our own copy of the cmd line */
+	memcpy(cmd, COMMAND_LINE, COMMAND_LINE_SIZE);
+	cmd[COMMAND_LINE_SIZE-1] = '\0';
+	opt = cmd;
+	
+	s = strstr(opt, "earlyprintk=");
+	if (s == NULL)
+		return -1;
+	opt = s+12;
+	
+	if (early_console_initialized)
+		return -1;
+
+	strncpy(buf,opt,256); 
+	buf[255] = 0; 
+	space = strchr(buf, ' '); 
+	if (space)
+		*space = 0; 
+
+	if (strstr(buf,"keep"))
+		keep_early = 1; 
+
+	if (!strncmp(buf, "serial", 6)) { 
+		early_serial_init(buf + 6);
+		early_console = &early_serial_console;
+	} else if (!strncmp(buf, "ttyS", 4)) { 
+		early_serial_init(buf);
+		early_console = &early_serial_console;		
+	} else if (!strncmp(buf, "vga", 3)) {
+		early_console = &early_vga_console; 
+	} else {
+		early_console = NULL; 		
+		return -1; 
+	}
+	early_console_initialized = 1;
+	register_console(early_console);
+	printk("early printk console registered\n");
+	return 0;
+}
+
+void __init disable_early_printk(void)
+{ 
+	if (!early_console_initialized || !early_console)
+		return;
+	if (!keep_early) {
+		printk("disabling early console...\n"); 
+		unregister_console(early_console);
+		early_console_initialized = 0;
+	} else { 
+		printk("keeping early console.\n"); 
+	}
+} 
+
+/* syntax: earlyprintk=vga
+           earlyprintk=serial[,ttySn[,baudrate]] 
+   Append ,keep to not disable it when the real console takes over.
+   Only vga or serial at a time, not both.
+   Currently only ttyS0 and ttyS1 are supported. 
+   Interaction with the standard serial driver is not very good. 
+   The VGA output is eventually overwritten by the real console. */
+__setup("earlyprintk=", setup_early_printk);  
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/fork.c 901-mjb1.1/kernel/fork.c
--- 000-virgin/kernel/fork.c	Tue Aug  5 20:01:56 2003
+++ 901-mjb1.1/kernel/fork.c	Wed Aug 13 20:51:52 2003
@@ -180,7 +180,10 @@ void __init fork_init(unsigned long memp
 	 * value: the thread structures can take up at most half
 	 * of memory.
 	 */
-	max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
+	if (THREAD_SIZE >= PAGE_SIZE)
+		max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
+	else
+		max_threads = (mempages * (PAGE_SIZE/THREAD_SIZE)) / 8;
 	/*
 	 * we need to allow at least 20 threads to boot a system
 	 */
@@ -232,7 +235,7 @@ static inline int dup_mmap(struct mm_str
 	mm->mmap_cache = NULL;
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->map_count = 0;
-	mm->rss = 0;
+	zero_rss(mm);
 	mm->cpu_vm_mask = 0;
 	pprev = &mm->mmap;
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/gcov.c 901-mjb1.1/kernel/gcov.c
--- 000-virgin/kernel/gcov.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/kernel/gcov.c	Wed Aug 13 20:51:56 2003
@@ -0,0 +1,158 @@
+/*
+ * Coverage support under Linux
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) International Business Machines Corp., 2002
+ *
+ * Author: Hubertus Franke <frankeh@us.ibm.com>
+ *         Rajan Ravindran <rajancr@us.ibm.com>
+ *
+ * Modified by <Peter.Oberparleiter@de.ibm.com>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/completion.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+
+struct bb
+{
+  long zero_word;
+  const char *filename;
+  long *counts;
+  long ncounts;
+  struct bb *next;
+  const unsigned long *addresses;
+
+  /* Older GCC's did not emit these fields.  */
+  long nwords;
+  const char **functions;
+  const long *line_nums;
+  const char **filenames;
+  char *flags;
+};
+
+struct bb *bb_head;
+struct module *bb_context_address;
+void (*gcov_callback)(int cmd, struct bb *bbptr) = NULL;
+
+#ifdef GCOV_PATH
+char *gcov_kernelpath = GCOV_PATH;
+#else
+char *gcov_kernelpath = __FILE__;
+#endif
+
+
+void
+__bb_init_func (struct bb *blocks)
+{
+  if (blocks->zero_word)
+    return;
+
+  /* Set up linked list.  */
+  blocks->zero_word = 1;
+
+  /* Store the address of the module of which this object-file is a part
+     of (set in do_global_ctors). */
+  blocks->addresses = (unsigned long *) bb_context_address;
+
+  blocks->next = bb_head;
+  bb_head = blocks;
+
+  if (gcov_callback && bb_context_address) 
+    (*gcov_callback)(1,blocks);
+}
+
+/* Call constructors for all kernel objects and dynamic modules. This function
+ * is called both during module initialization and when the gcov kernel
+ * module is insmod'ed. The list of constructors is compiled into the
+ * kernel at &__CTOR_LIST__ to &__DTOR_LIST__ (labels are defined in
+ * head.S). In the case of a dynamic module the list is located at
+ * ctors_start to ctors_end.
+ *
+ * The constructors in turn call __bb_init_func, reporting the respective
+ * struct bb for each object file.
+ */
+
+void
+do_global_ctors (char *ctors_start, char *ctors_end, struct module *addr, int mod_flag)
+{
+  extern char __CTOR_LIST__;
+  extern char __DTOR_LIST__;
+  typedef void (*func_ptr)(void) ;
+  func_ptr *constructor_ptr=NULL;
+ 
+  if (!mod_flag) {
+    /* Set start and end ptr from global kernel constructor list. */
+    ctors_start = &__CTOR_LIST__;
+    ctors_end = &__DTOR_LIST__;
+    bb_context_address = NULL;
+  } else {
+    /* Set context to current module address. */
+    bb_context_address = addr;
+  }
+
+  if (!ctors_start)
+    return;
+
+  /* Call all constructor functions until either the end of the
+     list is reached or until a NULL is encountered. */
+  for (constructor_ptr = (func_ptr *) ctors_start;
+       (constructor_ptr != (func_ptr *) ctors_end) &&
+         (*constructor_ptr != NULL);
+       constructor_ptr++) {
+    	(*constructor_ptr) ();
+  }
+}        
+
+
+/* When a module is unloaded, this function is called to remove
+ * the respective bb entries from our list. context specifies
+ * the address of the module that is unloaded. */
+
+void
+remove_bb_link (struct module *context)
+{
+  struct bb *bbptr;
+  struct bb *prev = NULL;
+
+  /* search for all the module's bbptrs */
+  for (bbptr = bb_head; bbptr ; bbptr = bbptr->next) {
+    if (bbptr->addresses == (unsigned long *) context) {
+      if (gcov_callback)
+        (*gcov_callback)(0,bbptr);
+      if (prev == NULL) 
+        bb_head = bbptr->next;
+      else
+        prev->next = bbptr->next;
+    }
+    else
+      prev = bbptr;
+  }
+}
+
+EXPORT_SYMBOL(bb_head);
+EXPORT_SYMBOL(__bb_init_func);
+EXPORT_SYMBOL(do_global_ctors);
+EXPORT_SYMBOL(gcov_kernelpath);
+EXPORT_SYMBOL(gcov_callback);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/ksyms.c 901-mjb1.1/kernel/ksyms.c
--- 000-virgin/kernel/ksyms.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/kernel/ksyms.c	Wed Aug 13 20:29:41 2003
@@ -462,7 +462,12 @@ EXPORT_SYMBOL(sleep_on);
 EXPORT_SYMBOL(sleep_on_timeout);
 EXPORT_SYMBOL(interruptible_sleep_on);
 EXPORT_SYMBOL(interruptible_sleep_on_timeout);
+#ifdef CONFIG_KGDB_THREAD
+EXPORT_SYMBOL(kern_schedule);
+EXPORT_SYMBOL(do_schedule);
+#else
 EXPORT_SYMBOL(schedule);
+#endif
 #ifdef CONFIG_PREEMPT
 EXPORT_SYMBOL(preempt_schedule);
 #endif
@@ -610,6 +615,16 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 EXPORT_SYMBOL(set_fs_pwd);
 EXPORT_SYMBOL(set_fs_root);
+
+#if defined(CONFIG_LOCKMETER)
+EXPORT_SYMBOL(_metered_spin_lock);
+EXPORT_SYMBOL(_metered_spin_unlock);
+EXPORT_SYMBOL(_metered_spin_trylock);
+EXPORT_SYMBOL(_metered_read_lock);
+EXPORT_SYMBOL(_metered_read_unlock);
+EXPORT_SYMBOL(_metered_write_lock);
+EXPORT_SYMBOL(_metered_write_unlock);
+#endif
 
 /* debug */
 EXPORT_SYMBOL(dump_stack);
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/lockmeter.c 901-mjb1.1/kernel/lockmeter.c
--- 000-virgin/kernel/lockmeter.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/kernel/lockmeter.c	Wed Aug 13 20:29:36 2003
@@ -0,0 +1,1088 @@
+/*
+ *  Copyright (C) 1999,2000 Silicon Graphics, Inc.
+ *
+ *  Written by John Hawkes (hawkes@sgi.com)
+ *  Based on klstat.c by Jack Steiner (steiner@sgi.com)
+ *  
+ *  Modified by Ray Bryant (raybry@us.ibm.com)
+ *  Changes Copyright (C) 2000 IBM, Inc.
+ *  Added save of index in spinlock_t to improve efficiency
+ *  of "hold" time reporting for spinlocks
+ *  Added support for hold time statistics for read and write
+ *  locks.
+ */
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
+#include <linux/utsname.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/lockmeter.h>
+#else
+#define __SMP__
+#include <linux/config.h>
+#include <stdio.h>
+#include <time.h>
+#include "bitops.h"
+#include "user_scaffold.h"
+#include <linux/utsname.h>
+#include <linux/spinlock.h>
+#include "newlockmeter.h"
+#endif
+
+#ifdef __KERNEL__
+#define ASSERT(cond)
+#define bzero(loc,size)		memset(loc,0,size)
+#endif
+
+/*<---------------------------------------------------*/
+/*              lockmeter.c                           */
+/*>---------------------------------------------------*/
+
+#ifdef __KERNEL__
+static lstat_control_t	lstat_control __cacheline_aligned = {LSTAT_OFF, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED, 19*0, NR_CPUS*0, 0, NR_CPUS*0};
+#else
+lstat_control_t	lstat_control = {LSTAT_OFF, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED, 19*0, NR_CPUS*0, 0, NR_CPUS*0};
+#endif
+
+int smp_num_cpus=NR_CPUS;
+
+#undef BUG
+#define BUG()
+
+static ushort	lstat_make_dir_entry(void *, void *);
+
+/*
+ * lstat_lookup
+ * 
+ * Given a RA, locate the directory entry for the lock.
+ */
+static ushort	
+lstat_lookup(
+	void	*lock_ptr,
+	void	*caller_ra)
+{
+	ushort			index;
+	lstat_directory_entry_t	*dirp;
+
+	dirp = lstat_control.dir;
+
+	index = lstat_control.hashtab[DIRHASH(caller_ra)];
+	while (dirp[index].caller_ra != caller_ra) {
+		if (index == 0) {
+			return(lstat_make_dir_entry(lock_ptr, caller_ra));
+		}
+		index = dirp[index].next_stat_index;
+	}
+
+	if (dirp[index].lock_ptr != NULL && 
+			dirp[index].lock_ptr != lock_ptr)  {
+		dirp[index].lock_ptr = NULL;
+	}
+
+	return(index);
+}
+
+
+/*
+ * lstat_make_dir_entry
+ * Called to add a new lock to the lock directory.
+ */
+static ushort	
+lstat_make_dir_entry(
+	void	*lock_ptr, 			
+	void	*caller_ra)
+{
+	lstat_directory_entry_t	*dirp;
+	ushort			index, hindex;
+	unsigned long		flags;
+
+	/* lock the table without recursively reentering this metering code */
+	do { local_irq_save(flags);
+	     _raw_spin_lock(&lstat_control.directory_lock); } while(0);
+
+	hindex = DIRHASH(caller_ra);
+	index = lstat_control.hashtab[hindex];
+	dirp = lstat_control.dir;
+	while (index && dirp[index].caller_ra != caller_ra)
+		index = dirp[index].next_stat_index;
+
+	if (index == 0) {
+		if(lstat_control.next_free_dir_index < LSTAT_MAX_STAT_INDEX) {
+			index = lstat_control.next_free_dir_index++;
+			lstat_control.dir[index].caller_ra = caller_ra;
+			lstat_control.dir[index].lock_ptr = lock_ptr;
+			lstat_control.dir[index].next_stat_index = lstat_control.hashtab[hindex];
+			lstat_control.hashtab[hindex] = index;
+		} else  {
+			lstat_control.dir_overflow++;
+		}
+	}
+
+	do { _raw_spin_unlock(&lstat_control.directory_lock);
+	     local_irq_restore(flags);} while(0);
+	return(index);
+}
+
+int
+lstat_update (
+	void	*lock_ptr,
+	void	*caller_ra,
+	int	action)
+{
+	int	index;
+	int	cpu;
+
+	ASSERT(action < LSTAT_ACT_MAX_VALUES);
+
+	if (lstat_control.state == LSTAT_OFF) {
+	    return(0);
+	}
+
+	index = lstat_lookup(lock_ptr, caller_ra);
+	cpu = THIS_CPU_NUMBER;
+	(*lstat_control.counts[cpu])[index].count[action]++;
+	(*lstat_control.counts[cpu])[index].acquire_time = get_cycles();
+
+	return(index);
+}
+
+int
+lstat_update_time (
+	void 		*lock_ptr,
+	void		*caller_ra,
+	int		action,
+	uint32_t	ticks)
+{
+	ushort	index;
+	int	cpu;
+
+	ASSERT(action < LSTAT_ACT_MAX_VALUES);
+
+	if (lstat_control.state == LSTAT_OFF) {
+		return(0);
+	}
+
+	index = lstat_lookup(lock_ptr, caller_ra);
+	cpu = THIS_CPU_NUMBER;
+	(*lstat_control.counts[cpu])[index].count[action]++;
+	(*lstat_control.counts[cpu])[index].cum_wait_ticks += (uint64_t)ticks;
+	if ((*lstat_control.counts[cpu])[index].max_wait_ticks < ticks)
+	    (*lstat_control.counts[cpu])[index].max_wait_ticks = ticks;
+
+	(*lstat_control.counts[cpu])[index].acquire_time = get_cycles();
+
+	return(index);
+}
+
+void _metered_spin_lock(spinlock_t *lock_ptr)
+{
+	if (lstat_control.state == LSTAT_OFF) {
+	    _raw_spin_lock(lock_ptr);	/* do the real lock */
+	    PUT_INDEX(lock_ptr,0);	/* clean index in case lockmetering  */
+					/* gets turned on before unlock      */
+	} else {
+	void *this_pc = LSTAT_RA(LSTAT_RA_SPIN);
+	int index;
+
+	    if (_raw_spin_trylock(lock_ptr)) {
+		index = lstat_update(lock_ptr, this_pc, LSTAT_ACT_NO_WAIT);
+	    } else {
+		uint32_t start_cycles = get_cycles();
+		_raw_spin_lock(lock_ptr);		/* do the real lock */
+		index = lstat_update_time(lock_ptr, this_pc, LSTAT_ACT_SPIN,
+					  get_cycles() - start_cycles);
+	    }
+	    /* save the index in the lock itself for use in spin unlock */
+	    PUT_INDEX(lock_ptr,index);
+	}
+}
+
+int _metered_spin_trylock(spinlock_t *lock_ptr)
+{
+	if (lstat_control.state == LSTAT_OFF) {
+	    return _raw_spin_trylock(lock_ptr);
+	} else {
+	    int retval;
+	    void *this_pc = LSTAT_RA(LSTAT_RA_SPIN);
+
+	    if ((retval = _raw_spin_trylock(lock_ptr))) {
+		int index = lstat_update(lock_ptr, this_pc, LSTAT_ACT_NO_WAIT);
+		/* save the index in the lock itself for use in spin unlock */
+		PUT_INDEX(lock_ptr,index);
+	    } else {
+		lstat_update(lock_ptr, this_pc, LSTAT_ACT_REJECT);
+	    }
+
+	    return retval;
+	}
+}
+
+void _metered_spin_unlock(spinlock_t *lock_ptr)
+{
+	int index=-1;
+
+	if (lstat_control.state != LSTAT_OFF) {
+		index = GET_INDEX(lock_ptr);
+		/*
+		 * If statistics were turned off when we set the lock,
+		 * then the index can be zero.  If that is the case,
+		 * then collect no stats on this call.
+		 */
+		if (index > 0) {
+			uint32_t hold_time;
+			int cpu = THIS_CPU_NUMBER;
+			hold_time = get_cycles() - (*lstat_control.counts[cpu])[index].acquire_time;
+			(*lstat_control.counts[cpu])[index].cum_hold_ticks += (uint64_t)hold_time;
+			if ((*lstat_control.counts[cpu])[index].max_hold_ticks < hold_time)
+				(*lstat_control.counts[cpu])[index].max_hold_ticks = hold_time;
+		}
+	}
+
+	/* make sure we don't have a stale index value saved */
+	PUT_INDEX(lock_ptr,0);
+	_raw_spin_unlock(lock_ptr);	/* do the real unlock */
+}
+
+/* 
+ * allocate the next global read lock structure and store its index
+ * in the rwlock at "lock_ptr". 
+ */
+uint32_t alloc_rwlock_struct(rwlock_t *rwlock_ptr)
+{
+	int index;
+	int flags;
+	int cpu=THIS_CPU_NUMBER;
+
+	/* If we've already overflowed, then do a quick exit */
+	if (lstat_control.next_free_read_lock_index > LSTAT_MAX_READ_LOCK_INDEX) {
+		lstat_control.rwlock_overflow++;
+		return(0);
+	}
+
+	do { local_irq_save(flags);
+	     _raw_spin_lock(&lstat_control.directory_lock); } while(0);
+
+	/* It is possible this changed while we were waiting for the directory_lock */
+	if (lstat_control.state == LSTAT_OFF) {
+		index=0;
+		goto unlock;
+	}
+
+    /* It is possible someone else got here first and set the index */
+	if ((index=GET_RWINDEX(rwlock_ptr)) == 0) {
+
+		/* we can't turn on read stats for this lock while there are readers */
+		/* (this would mess up the running hold time sum at unlock time)     */
+		if (RWLOCK_READERS(rwlock_ptr) != 0) {
+			index=0;
+			goto unlock;
+		}
+
+	    /* if stats are turned on after being off, we may need to return an old  */
+		/* index from when the statistics were on last time. ................... */
+		for(index=1;index<lstat_control.next_free_read_lock_index;index++) 
+			if ((*lstat_control.read_lock_counts[cpu])[index].lock_ptr == rwlock_ptr)
+				goto put_index_and_unlock;
+
+		/* allocate the next global read lock structure */
+		if (lstat_control.next_free_read_lock_index >= LSTAT_MAX_READ_LOCK_INDEX) {
+		    lstat_control.rwlock_overflow++;
+			index = 0;
+			goto unlock;
+		}
+		index = lstat_control.next_free_read_lock_index++;
+
+		/* initialize the global read stats data structure for each cpu */
+		for(cpu=0; cpu < smp_num_cpus; cpu++) {
+			(*lstat_control.read_lock_counts[cpu])[index].lock_ptr = rwlock_ptr;
+		}
+put_index_and_unlock:
+		/* store the index for the read lock structure into the lock */
+		PUT_RWINDEX(rwlock_ptr,index);
+	}
+
+unlock:
+	do { _raw_spin_unlock(&lstat_control.directory_lock);
+	     local_irq_restore(flags);} while(0);
+
+	return(index);
+}
+
+void 
+_metered_read_lock(rwlock_t *rwlock_ptr)
+{
+	void *this_pc;
+	uint32_t start_cycles;
+	int index;
+	int cpu;
+	int flags;
+	int readers_before, readers_after;
+	uint64_t cycles64;
+
+	if (lstat_control.state == LSTAT_OFF) {
+		_raw_read_lock(rwlock_ptr);
+		/* clean index in case lockmetering turns on before an unlock */
+		PUT_RWINDEX(rwlock_ptr, 0);
+		return;
+	}
+
+	this_pc = LSTAT_RA(LSTAT_RA_READ);
+	cpu = THIS_CPU_NUMBER;
+	index = GET_RWINDEX(rwlock_ptr);
+
+	/* allocate the global stats entry for this lock, if needed */
+	if (index==0) {
+		index = alloc_rwlock_struct(rwlock_ptr);
+	}
+
+	readers_before = RWLOCK_READERS(rwlock_ptr);
+	if (_raw_read_trylock(rwlock_ptr)) {
+	    /*
+	     * We have decremented the lock to count a new reader,
+	     * and have confirmed that no writer has it locked.
+	     */
+		/* update statistics if enabled */
+		if (index>0) { 
+#ifndef __KERNEL__
+			_raw_spin_lock((spinlock_t *)&(*lstat_control.read_lock_counts[cpu])[index].entry_lock);
+#else
+			do { local_irq_save(flags); } while(0);
+#endif
+			lstat_update((void *)rwlock_ptr, this_pc, LSTAT_ACT_NO_WAIT);
+			/* preserve value of TSC so cum_hold_ticks and start_busy use same value */
+			cycles64 = get_cycles64();
+			(*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks -= cycles64;
+
+			/* record time and cpu of start of busy period */
+			/* this is not perfect (some race conditions are possible) */
+			if (readers_before==0) {
+				(*lstat_control.read_lock_counts[cpu])[index].start_busy = cycles64;
+				PUT_RW_CPU(rwlock_ptr, cpu);
+			}
+			readers_after=RWLOCK_READERS(rwlock_ptr);
+			if (readers_after > (*lstat_control.read_lock_counts[cpu])[index].max_readers)
+				(*lstat_control.read_lock_counts[cpu])[index].max_readers = readers_after;
+#ifndef __KERNEL__
+			_raw_spin_unlock((spinlock_t*)&(*lstat_control.read_lock_counts[cpu])[index].entry_lock);
+#else
+			do {local_irq_restore(flags);} while(0);
+#endif
+		}
+
+	    return;	
+	}
+	/* If we get here, then we could not quickly grab the read lock */
+
+	start_cycles = get_cycles();	/* start counting the wait time */
+
+	/* Now spin until read_lock is successful */
+	_raw_read_lock(rwlock_ptr);
+
+	lstat_update_time((void *)rwlock_ptr, this_pc, LSTAT_ACT_SPIN,
+			  get_cycles() - start_cycles);
+
+	/* update statistics if they are enabled for this lock */
+	if (index>0) {
+#ifndef __KERNEL__
+		_raw_spin_lock((spinlock_t *)&(*lstat_control.read_lock_counts[cpu])[index].entry_lock);
+#else
+		do { local_irq_save(flags); } while(0);
+#endif
+		cycles64 = get_cycles64();
+		(*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks -= cycles64;
+
+		/* this is not perfect (some race conditions are possible) */
+		if (readers_before==0) { 
+			(*lstat_control.read_lock_counts[cpu])[index].start_busy = cycles64;
+			PUT_RW_CPU(rwlock_ptr, cpu);
+		}
+		readers_after=RWLOCK_READERS(rwlock_ptr);
+		if (readers_after > (*lstat_control.read_lock_counts[cpu])[index].max_readers)
+			(*lstat_control.read_lock_counts[cpu])[index].max_readers = readers_after;
+
+#ifndef __KERNEL__
+		_raw_spin_unlock((spinlock_t *)&(*lstat_control.read_lock_counts[cpu])[index].entry_lock);
+#else
+		do {local_irq_restore(flags);} while(0);
+#endif
+	}
+}
+
+void _metered_read_unlock(rwlock_t *rwlock_ptr) 
+{
+	int index;
+	int cpu;
+	int flags;
+	uint64_t busy_length;
+	uint64_t cycles64;
+
+	if (lstat_control.state == LSTAT_OFF) {
+		_raw_read_unlock(rwlock_ptr);
+		return;
+	}
+
+	index = GET_RWINDEX(rwlock_ptr);
+	cpu = THIS_CPU_NUMBER;
+
+	if (index>0) {
+#ifndef __KERNEL__
+		_raw_spin_lock((spinlock_t *)&(*lstat_control.read_lock_counts[cpu])[index].entry_lock);
+#else
+		/* updates below are non-atomic */
+		do { local_irq_save(flags); } while(0);
+#endif
+		/* preserve value of TSC so cum_hold_ticks and busy_ticks are consistent.. */
+		cycles64 = get_cycles64();
+		(*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks += cycles64;
+		(*lstat_control.read_lock_counts[cpu])[index].read_lock_count++;
+
+		/* once again, this is not perfect (some race conditions are possible) */
+		if (RWLOCK_READERS(rwlock_ptr) == 1) {
+			int cpu1 = GET_RW_CPU(rwlock_ptr);
+			uint64_t last_start_busy = (*lstat_control.read_lock_counts[cpu1])[index].start_busy;
+			(*lstat_control.read_lock_counts[cpu])[index].busy_periods++;
+			if (cycles64 > last_start_busy) {
+				busy_length = cycles64 - last_start_busy;
+				(*lstat_control.read_lock_counts[cpu])[index].busy_ticks += busy_length;
+				if (busy_length > (*lstat_control.read_lock_counts[cpu])[index].max_busy)
+					(*lstat_control.read_lock_counts[cpu])[index].max_busy = busy_length;
+			}
+		}
+#ifndef __KERNEL__
+		_raw_spin_unlock((spinlock_t *)&(*lstat_control.read_lock_counts[cpu])[index].entry_lock);
+#else
+		do {local_irq_restore(flags);} while(0);
+#endif
+	}
+
+	/* unlock the lock */
+	_raw_read_unlock(rwlock_ptr);
+}
+
+void _metered_write_lock(rwlock_t *rwlock_ptr)
+{
+	uint32_t start_cycles;
+	void *this_pc;
+	uint32_t spin_ticks = 0;    /* in anticipation of a potential wait */
+	int index;
+	int write_index = 0;
+	int cpu;
+	enum {writer_writer_conflict, writer_reader_conflict} why_wait = writer_writer_conflict;
+
+	if (lstat_control.state == LSTAT_OFF) {
+		_raw_write_lock(rwlock_ptr);
+		/* clean index in case lockmetering turns on before an unlock */
+		PUT_RWINDEX(rwlock_ptr, 0);
+		return;
+	}
+
+	this_pc = LSTAT_RA(LSTAT_RA_WRITE);
+	cpu = THIS_CPU_NUMBER;
+	index = GET_RWINDEX(rwlock_ptr);
+
+	/* allocate the global stats entry for this lock, if needed */
+	if (index == 0) {
+		index = alloc_rwlock_struct(rwlock_ptr);
+	} 
+
+	if (_raw_write_trylock(rwlock_ptr)) {
+	    /* We acquired the lock on the first try */
+	    write_index = lstat_update((void *)rwlock_ptr, this_pc, LSTAT_ACT_NO_WAIT);
+		/* save the write_index for use in unlock if stats enabled */
+		if (index > 0) 
+			(*lstat_control.read_lock_counts[cpu])[index].write_index = write_index;
+		return;
+	}
+
+	/* If we get here, then we could not quickly grab the write lock */
+	start_cycles = get_cycles();	/* start counting the wait time */
+
+	why_wait = RWLOCK_READERS(rwlock_ptr) ? writer_reader_conflict : writer_writer_conflict;
+
+	/* Now set the lock and wait for conflicts to disappear */
+	_raw_write_lock(rwlock_ptr);
+
+	spin_ticks = get_cycles() - start_cycles;
+
+	/* update stats -- if enabled */
+	if (index > 0)  
+		if (spin_ticks) {
+			if (why_wait == writer_reader_conflict) {
+				/* waited due to a reader holding the lock */
+				write_index = lstat_update_time((void *)rwlock_ptr, this_pc,
+						  LSTAT_ACT_SPIN, spin_ticks);
+			} else {
+				/* waited due to another writer holding the lock */
+				write_index = lstat_update_time((void *)rwlock_ptr, this_pc,
+						  LSTAT_ACT_WW_SPIN, spin_ticks);
+				(*lstat_control.counts[cpu])[write_index].cum_wait_ww_ticks += spin_ticks;
+				if (spin_ticks > 
+					(*lstat_control.counts[cpu])[write_index].max_wait_ww_ticks) {
+					(*lstat_control.counts[cpu])[write_index].max_wait_ww_ticks = spin_ticks;
+				}
+			}
+
+		/* save the directory index for use on write_unlock */
+		(*lstat_control.read_lock_counts[cpu])[index].write_index = write_index;
+	}
+
+}
+
+void
+_metered_write_unlock(rwlock_t *rwlock_ptr)
+{
+	int index;
+	int cpu;
+	int write_index;
+	uint32_t hold_time;
+
+	if (lstat_control.state == LSTAT_OFF) {
+		_raw_write_unlock(rwlock_ptr);
+		return;
+	}
+
+	cpu = THIS_CPU_NUMBER;
+	index = GET_RWINDEX(rwlock_ptr);
+
+	/* update statistics if stats enabled for this lock */
+	if (index>0) { 
+		write_index = (*lstat_control.read_lock_counts[cpu])[index].write_index;
+
+		hold_time = get_cycles() - (*lstat_control.counts[cpu])[write_index].acquire_time;
+		(*lstat_control.counts[cpu])[write_index].cum_hold_ticks += (uint64_t)hold_time;
+		if ((*lstat_control.counts[cpu])[write_index].max_hold_ticks < hold_time)
+			(*lstat_control.counts[cpu])[write_index].max_hold_ticks = hold_time;
+	}
+	_raw_write_unlock(rwlock_ptr);
+}
+
+int _metered_write_trylock(rwlock_t *rwlock_ptr)
+{
+	int retval;
+	void *this_pc = LSTAT_RA(LSTAT_RA_WRITE);
+
+	if ((retval = _raw_write_trylock(rwlock_ptr))) {
+	    lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_NO_WAIT);
+	} else {
+	    lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_REJECT);
+	}
+
+	return retval;
+}
+
+#ifdef __KERNEL__
+static void
+init_control_space(void)
+{
+	/* Set all control space pointers to null and indices to "empty" */
+	int cpu;
+
+	/*
+	 * Access CPU_CYCLE_FREQUENCY at the outset, which in some
+	 * architectures may trigger a runtime calculation that uses a
+	 * spinlock.  Let's do this before lockmetering is turned on.
+	 */
+	if (CPU_CYCLE_FREQUENCY == 0)
+		BUG();
+
+	lstat_control.hashtab	= NULL;
+	lstat_control.dir	= NULL;
+	for (cpu=0; cpu<NR_CPUS; cpu++) {
+		lstat_control.counts[cpu]	= NULL;
+		lstat_control.read_lock_counts[cpu]	= NULL;
+	}
+}
+
+static int
+reset_lstat_data(void)
+{
+	int cpu,flags;
+
+	flags = 0;
+	lstat_control.next_free_dir_index = 1;	/* 0 is for overflows */
+	lstat_control.next_free_read_lock_index = 1;
+	lstat_control.dir_overflow = 0;
+	lstat_control.rwlock_overflow = 0;
+
+	lstat_control.started_cycles64 = 0;
+	lstat_control.ending_cycles64 = 0;
+	lstat_control.enabled_cycles64 = 0;
+	lstat_control.first_started_time = 0;
+	lstat_control.started_time = 0;
+	lstat_control.ending_time = 0;
+	lstat_control.intervals = 0;
+
+	/* paranoia -- in case someone does a "lockstat reset" before "lockstat on" */
+	if (lstat_control.hashtab) {
+		bzero(lstat_control.hashtab, LSTAT_HASH_TABLE_SIZE*sizeof(short));
+		bzero(lstat_control.dir, LSTAT_MAX_STAT_INDEX*sizeof(lstat_directory_entry_t));
+
+		for (cpu = 0; cpu<smp_num_cpus; cpu++) {
+			bzero(lstat_control.counts[cpu], sizeof(lstat_cpu_counts_t));
+			bzero(lstat_control.read_lock_counts[cpu], sizeof(lstat_read_lock_cpu_counts_t));
+		}
+	}
+	#ifdef NOTDEF
+	_raw_spin_unlock(&lstat_control.directory_lock);
+	local_irq_restore(flags);
+	#endif
+	return(1);
+}
+
+static void
+release_control_space(void)
+{
+	/*
+	 * Called when either (1) allocation of kmem
+	 * or (2) when user writes LSTAT_RELEASE to /pro/lockmeter.
+	 * Assume that all pointers have been initialized to zero,
+	 * i.e., nonzero pointers are valid addresses.
+	 */
+	int cpu;
+
+	if (lstat_control.hashtab) {
+		kfree(lstat_control.hashtab);
+		lstat_control.hashtab = NULL;
+	}
+
+	if (lstat_control.dir) {
+		vfree(lstat_control.dir);
+		lstat_control.dir = NULL;
+	}
+
+	for (cpu = 0; cpu<NR_CPUS; cpu++) {
+		if (lstat_control.counts[cpu]) {
+			vfree(lstat_control.counts[cpu]);
+			lstat_control.counts[cpu] = NULL;
+		}
+		if (lstat_control.read_lock_counts[cpu]) {
+			kfree(lstat_control.read_lock_counts[cpu]);
+			lstat_control.read_lock_counts[cpu] = NULL;
+		}
+	}
+}
+
+int get_lockmeter_info_size(void)
+{
+	return sizeof(lstat_user_request_t)
+		+ smp_num_cpus * sizeof(lstat_cpu_counts_t)
+		+ smp_num_cpus * sizeof(lstat_read_lock_cpu_counts_t)
+		+ (LSTAT_MAX_STAT_INDEX * sizeof(lstat_directory_entry_t));
+}
+
+ssize_t get_lockmeter_info(char *buffer, size_t max_len, loff_t *last_index)
+{
+	lstat_user_request_t	req;
+	struct timeval		tv;
+	ssize_t			next_ret_bcount;
+	ssize_t			actual_ret_bcount = 0;
+	int             cpu;
+    
+	*last_index = 0;	/* a one-shot read */
+
+	req.lstat_version	    = LSTAT_VERSION;
+	req.state	            = lstat_control.state;
+	req.maxcpus		    = smp_num_cpus;
+	req.cycleval		    = CPU_CYCLE_FREQUENCY;
+#ifdef notyet
+	req.kernel_magic_addr	= (void *)&_etext;
+	req.kernel_end_addr	    = (void *)&_etext;
+#endif
+	req.uts                 = system_utsname;
+	req.intervals           = lstat_control.intervals;
+
+	req.first_started_time      = lstat_control.first_started_time;
+	req.started_time            = lstat_control.started_time;
+	req.started_cycles64        = lstat_control.started_cycles64;
+
+	req.next_free_dir_index	     = lstat_control.next_free_dir_index;
+	req.next_free_read_lock_index= lstat_control.next_free_read_lock_index;
+	req.dir_overflow             = lstat_control.dir_overflow;
+	req.rwlock_overflow          = lstat_control.rwlock_overflow;
+
+	if (lstat_control.state == LSTAT_OFF) {
+		if (req.intervals==0) {
+			/* mesasurement is off and no valid data present */
+			next_ret_bcount = sizeof(lstat_user_request_t);
+			req.enabled_cycles64= 0;
+
+			if ((actual_ret_bcount + next_ret_bcount) > max_len)
+				return actual_ret_bcount;
+
+			copy_to_user(buffer, (void *)&req, next_ret_bcount);
+			actual_ret_bcount += next_ret_bcount;
+			return actual_ret_bcount;
+		} else {
+			/* measurement is off but valid data present     */
+			/* fetch time info from lstat_control            */
+			req.ending_time      = lstat_control.ending_time;
+			req.ending_cycles64  = lstat_control.ending_cycles64;
+			req.enabled_cycles64 = lstat_control.enabled_cycles64;
+		}
+	} else {
+		/* this must be a read while data active--use current time, etc */
+		do_gettimeofday(&tv);
+		req.ending_time	         = tv.tv_sec;
+		req.ending_cycles64      = get_cycles64();
+		req.enabled_cycles64     = req.ending_cycles64-req.started_cycles64
+									+ lstat_control.enabled_cycles64;
+	}
+
+	next_ret_bcount = sizeof(lstat_user_request_t);
+	if ((actual_ret_bcount + next_ret_bcount) > max_len)
+	    return actual_ret_bcount;
+
+	copy_to_user(buffer, (void *)&req, next_ret_bcount);
+	actual_ret_bcount += next_ret_bcount;
+
+	if (!lstat_control.counts[0])	/* not initialized? */
+	    return actual_ret_bcount;
+
+	next_ret_bcount = sizeof(lstat_cpu_counts_t);
+	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+	    if ((actual_ret_bcount + next_ret_bcount) > max_len)
+		return actual_ret_bcount;	/* leave early */
+	    copy_to_user(buffer + actual_ret_bcount, lstat_control.counts[cpu],
+			 next_ret_bcount);
+	    actual_ret_bcount += next_ret_bcount;
+	}
+
+	next_ret_bcount = LSTAT_MAX_STAT_INDEX * sizeof(lstat_directory_entry_t);
+	if (  ((actual_ret_bcount + next_ret_bcount) > max_len)
+	   || !lstat_control.dir )
+	    return actual_ret_bcount;	/* leave early */
+
+	copy_to_user(buffer + actual_ret_bcount, lstat_control.dir,
+		     next_ret_bcount);
+	actual_ret_bcount += next_ret_bcount;
+
+	next_ret_bcount = sizeof(lstat_read_lock_cpu_counts_t);
+	for (cpu = 0; cpu <  smp_num_cpus; cpu++) {
+		if (actual_ret_bcount + next_ret_bcount > max_len)
+			return actual_ret_bcount;
+	    copy_to_user(buffer + actual_ret_bcount, lstat_control.read_lock_counts[cpu],
+			 next_ret_bcount);
+		actual_ret_bcount += next_ret_bcount;
+	}
+
+	return actual_ret_bcount;
+}
+
+/*
+ *  Writing to the /proc lockmeter node enables or disables metering.
+ *  based upon the first byte of the "written" data.
+ *  The following values are defined:
+ *  LSTAT_ON: 1st call: allocates storage, intializes and turns on measurement
+ *            subsequent calls just turn on measurement
+ *  LSTAT_OFF: turns off measurement
+ *  LSTAT_RESET: resets statistics
+ *  LSTAT_RELEASE: releases statistics storage
+ *
+ *  This allows one to accumulate statistics over several lockstat runs:
+ *
+ *  lockstat on
+ *  lockstat off
+ *  ...repeat above as desired...
+ *  lockstat get
+ *  ...now start a new set of measurements...
+ *  lockstat reset
+ *  lockstat on
+ *  ...
+ *
+ */
+ssize_t put_lockmeter_info(const char *buffer, size_t len)
+{
+	int	error = 0;
+	int	dirsize, countsize, read_lock_countsize, hashsize;
+	int	cpu;
+	char	put_char;
+	int i, read_lock_blocks, flags;
+	rwlock_t *lock_ptr;
+	struct timeval		tv;
+
+	if (len <= 0)
+	    return -EINVAL;
+
+	_raw_spin_lock(&lstat_control.control_lock);
+
+	get_user(put_char, buffer);
+	switch (put_char) {
+
+	case LSTAT_OFF:
+	    if (lstat_control.state != LSTAT_OFF) {
+			/*
+			 * To avoid seeing read lock hold times in an inconsisent state,
+			 * we have to follow this protocol to turn off statistics
+			 */
+			do { local_irq_save(flags); } while(0);
+			/* getting this lock will stop any read lock block allocations */
+			_raw_spin_lock(&lstat_control.directory_lock);
+			/* keep any more read lock blocks from being allocated */
+			lstat_control.state = LSTAT_OFF;
+			/* record how may read lock blocks there are */
+			read_lock_blocks = lstat_control.next_free_read_lock_index;
+			_raw_spin_unlock(&lstat_control.directory_lock);
+			/* now go through the list of read locks */
+			cpu = THIS_CPU_NUMBER;
+			for(i=1;i<read_lock_blocks;i++) {
+				lock_ptr = (*lstat_control.read_lock_counts[cpu])[i].lock_ptr;
+				/* is this saved lock address still valid? */
+				if (GET_RWINDEX(lock_ptr) == i) {
+					/* lock address appears to still be valid */
+					/* because we only hold one lock at a time, this can't */
+					/* cause a deadlock unless this is a lock held as part */
+					/* of the current system call path. At the moment there*/
+					/* are no READ mode locks held to get here from user   */
+					/* space, so we solve this by skipping locks held in   */
+					/* write mode. ....................................... */
+					if (RWLOCK_IS_WRITE_LOCKED(lock_ptr)) {
+						PUT_RWINDEX(lock_ptr,0);
+						continue;
+					}
+					/* now we know there are no read holders of this lock! */
+					/* stop statistics collection for this lock */
+					_raw_write_lock(lock_ptr);
+					PUT_RWINDEX(lock_ptr,0);
+					_raw_write_unlock(lock_ptr);
+				} 
+				/* it may still be possible for the hold time sum to be negative */
+				/* e. g. if a lock is reallocated while "busy" ................. */
+				/* we will have to fix this up in the data reduction program.... */
+			}
+		    do {local_irq_restore(flags);} while(0);
+		lstat_control.intervals++;
+		lstat_control.ending_cycles64 = get_cycles64();
+		lstat_control.enabled_cycles64 += lstat_control.ending_cycles64
+				- lstat_control.started_cycles64;
+		do_gettimeofday(&tv);
+		lstat_control.ending_time = tv.tv_sec;
+		/* don't deallocate the structures -- we may do a lockstat on to add to  */
+		/* the data that is already there. Use LSTAT_RELEASE to release storage  */
+	    } else {
+		error = -EBUSY;		/* already OFF */
+	    }
+	    break;
+
+	case LSTAT_ON:
+	    if (lstat_control.state == LSTAT_OFF) {
+#ifdef DEBUG_LOCKMETER
+		printk("put_lockmeter_info(cpu=%d): LSTAT_ON\n",THIS_CPU_NUMBER);
+#endif
+		lstat_control.next_free_dir_index = 1;	/* 0 is for overflows */
+
+		dirsize = LSTAT_MAX_STAT_INDEX * sizeof(lstat_directory_entry_t);
+		hashsize = (1 + LSTAT_HASH_TABLE_SIZE) * sizeof(ushort);
+		countsize = sizeof(lstat_cpu_counts_t);
+		read_lock_countsize = sizeof(lstat_read_lock_cpu_counts_t);
+#ifdef DEBUG_LOCKMETER
+		printk(" dirsize:%d",dirsize);
+		printk(" hashsize:%d", hashsize);
+		printk(" countsize:%d", countsize);
+		printk(" read_lock_countsize:%d\n", read_lock_countsize);
+#endif
+#ifdef DEBUG_LOCKMETER
+		{
+		int secs;
+		unsigned long cycles;
+		uint64_t cycles64;
+
+		do_gettimeofday(&tv);
+		secs = tv.tv_sec;
+		do { do_gettimeofday(&tv); } while (secs == tv.tv_sec);
+		cycles = get_cycles();
+		cycles64 = get_cycles64();
+		secs = tv.tv_sec;
+		do { do_gettimeofday(&tv); } while (secs == tv.tv_sec);
+		cycles = get_cycles() - cycles;
+		cycles64 = get_cycles64() - cycles;
+		printk("lockmeter: cycleFrequency:%d cycles:%d cycles64:%d\n",
+			CPU_CYCLE_FREQUENCY, cycles, cycles64);
+		}
+#endif
+
+		/* if this is the first call, allocate storage and initialize */
+		if (!lstat_control.hashtab) {
+
+		    spin_lock_init(&lstat_control.directory_lock);
+
+		    init_control_space();  /* guarantee all pointers at zero */
+
+		    lstat_control.hashtab = kmalloc(hashsize, GFP_KERNEL);
+		    if (!lstat_control.hashtab) {
+			error = -ENOSPC;
+#ifdef DEBUG_LOCKMETER
+			printk("!!error kmalloc of hashtab\n");
+#endif
+		    }
+		    lstat_control.dir = vmalloc(dirsize);
+		    if (!lstat_control.dir) {
+			error = -ENOSPC;
+#ifdef DEBUG_LOCKMETER
+			printk("!!error kmalloc of dir\n");
+#endif
+		    }
+
+		    for (cpu = 0; cpu<smp_num_cpus; cpu++) {
+			lstat_control.counts[cpu] = vmalloc(countsize);
+			if (!lstat_control.counts[cpu]) {
+			    error = -ENOSPC;
+#ifdef DEBUG_LOCKMETER
+			    printk("!!error vmalloc of counts[%d]\n",cpu);
+#endif
+			}
+			lstat_control.read_lock_counts[cpu] = 
+				(lstat_read_lock_cpu_counts_t *) kmalloc(read_lock_countsize, GFP_KERNEL);
+			if (!lstat_control.read_lock_counts[cpu]) {
+			    error = -ENOSPC;
+#ifdef DEBUG_LOCKMETER
+			    printk("!!error kmalloc of read_lock_counts[%d]\n",cpu);
+#endif
+			}
+		    }
+		}
+
+		if (error) {
+		    /* One or more kmalloc failures -- free everything */
+		    release_control_space();
+		} else {
+
+			if (!reset_lstat_data()) {
+				error = -EINVAL;
+				break;
+			};
+
+			/* record starting and ending times and the like */
+			if (lstat_control.intervals == 0) {
+				do_gettimeofday(&tv);
+				lstat_control.first_started_time = tv.tv_sec;
+			}
+			lstat_control.started_cycles64 = get_cycles64();
+			do_gettimeofday(&tv);
+			lstat_control.started_time = tv.tv_sec;
+
+			lstat_control.state = LSTAT_ON;
+		}
+	    } else {
+		error = -EBUSY;		/* already ON */
+	    }
+	    break;
+
+	case LSTAT_RESET:
+		if (lstat_control.state == LSTAT_OFF) {
+			if (!reset_lstat_data()) {
+				error = -EINVAL;
+			};
+		}
+		else
+			error = -EBUSY; /* still on; can't reset */
+		break;
+
+	case LSTAT_RELEASE:
+		if (lstat_control.state == LSTAT_OFF) {
+			release_control_space();
+			lstat_control.intervals = 0;
+			lstat_control.enabled_cycles64 = 0;
+		}
+		else
+			error = -EBUSY;
+		break;
+
+	default:
+	    error = -EINVAL;
+	} /* switch */
+
+	_raw_spin_unlock(&lstat_control.control_lock);
+	return ( (error) ? error : len );
+}
+
+#endif /* __KERNEL__ */
+#ifdef USER_MODE_TESTING
+/* following used for user mode testing */
+void lockmeter_init() {
+	int dirsize, hashsize, countsize, read_lock_countsize, cpu;
+
+	printf("lstat_control is at %x size=%d\n",&lstat_control,sizeof(lstat_control));
+	printf("sizeof(spinlock_t)=%d\n",sizeof(spinlock_t));
+	lstat_control.state = LSTAT_ON;
+
+	lstat_control.directory_lock      = SPIN_LOCK_UNLOCKED;
+	lstat_control.next_free_dir_index = 1;	/* 0 is for overflows */
+	lstat_control.next_free_read_lock_index = 1;
+
+	dirsize = LSTAT_MAX_STAT_INDEX * sizeof(lstat_directory_entry_t);
+	hashsize = (1 + LSTAT_HASH_TABLE_SIZE) * sizeof(ushort);
+	countsize = sizeof(lstat_cpu_counts_t);
+	read_lock_countsize = sizeof(lstat_read_lock_cpu_counts_t);
+
+	lstat_control.hashtab = (ushort *) malloc(hashsize);
+
+	if (lstat_control.hashtab == 0) {
+		printf("malloc failure for at line %d in lockmeter.c\n",__LINE__);
+		exit(0);
+	}
+
+	lstat_control.dir = (lstat_directory_entry_t *) malloc(dirsize);
+
+	if (lstat_control.dir == 0) {
+		printf("malloc failure for at line %d in lockmeter.c\n",cpu,__LINE__);
+		exit(0);
+	}
+
+	for (cpu = 0; cpu<smp_num_cpus; cpu++) {
+		int j,k;
+		j = (int) (lstat_control.counts[cpu] = (lstat_cpu_counts_t *) malloc(countsize));
+		k = (int) (lstat_control.read_lock_counts[cpu] = (lstat_read_lock_cpu_counts_t *) malloc(read_lock_countsize));
+		if( j*k == 0) {
+			printf("malloc failure for cpu=%d at line %d in lockmeter.c\n",cpu,__LINE__);
+			exit(0);
+		}
+	}
+
+	memset(lstat_control.hashtab, 0, hashsize);
+	memset(lstat_control.dir, 0, dirsize);
+
+	for (cpu = 0; cpu<smp_num_cpus; cpu++) {
+		memset(lstat_control.counts[cpu], 0, countsize);
+		memset(lstat_control.read_lock_counts[cpu], 0, read_lock_countsize);
+	}
+
+}
+
+asm(
+"
+.align	4
+.globl	__write_lock_failed
+__write_lock_failed:
+	" LOCK "addl	$" RW_LOCK_BIAS_STR ",(%eax)
+1:	cmpl	$" RW_LOCK_BIAS_STR ",(%eax)
+	jne	1b
+
+	" LOCK "subl	$" RW_LOCK_BIAS_STR ",(%eax)
+	jnz	__write_lock_failed
+	ret
+
+
+.align	4
+.globl	__read_lock_failed
+__read_lock_failed:
+	lock ; incl	(%eax)
+1:	cmpl	$1,(%eax)
+	js	1b
+
+	lock ; decl	(%eax)
+	js	__read_lock_failed
+	ret
+"
+);
+#endif
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/module.c 901-mjb1.1/kernel/module.c
--- 000-virgin/kernel/module.c	Tue Aug  5 20:01:56 2003
+++ 901-mjb1.1/kernel/module.c	Wed Aug 13 20:51:56 2003
@@ -83,6 +83,11 @@ int unregister_module_notifier(struct no
 }
 EXPORT_SYMBOL(unregister_module_notifier);
 
+#ifdef CONFIG_GCOV_PROFILE
+extern void remove_bb_link (struct module *);
+extern void do_global_ctors (char *, char *, struct module *, int);
+#endif
+
 /* We require a truly strong try_module_get() */
 static inline int strong_try_module_get(struct module *mod)
 {
@@ -1082,6 +1087,11 @@ static void free_module(struct module *m
 	/* Arch-specific cleanup. */
 	module_arch_cleanup(mod);
 
+#ifdef CONFIG_GCOV_PROFILE
+	if (mod->ctors_start && mod->ctors_end)
+		remove_bb_link(mod);
+#endif
+
 	/* Module unload stuff */
 	module_unload_free(mod);
 
@@ -1573,6 +1583,13 @@ static struct module *load_module(void _
 	/* Module has been moved. */
 	mod = (void *)sechdrs[modindex].sh_addr;
 
+#ifdef CONFIG_GCOV_PROFILE
+	modindex = find_sec(hdr, sechdrs, secstrings, ".ctors");
+	mod->ctors_start = (char *)sechdrs[modindex].sh_addr;
+	mod->ctors_end   = (char *)(mod->ctors_start +
+				sechdrs[modindex].sh_size);
+#endif
+
 	/* Now we've moved module, initialize linked lists, etc. */
 	module_unload_init(mod);
 
@@ -1722,6 +1739,12 @@ sys_init_module(void __user *umod,
 
 	/* Start the module */
 	ret = mod->init();
+
+#ifdef CONFIG_GCOV_PROFILE
+	if (mod->ctors_start && mod->ctors_end) {
+		do_global_ctors(mod->ctors_start, mod->ctors_end, mod, 1);
+	}
+#endif
 	if (ret < 0) {
 		/* Init routine failed: abort.  Try to protect us from
                    buggy refcounters. */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/pid.c 901-mjb1.1/kernel/pid.c
--- 000-virgin/kernel/pid.c	Fri May 30 19:02:24 2003
+++ 901-mjb1.1/kernel/pid.c	Wed Aug 13 20:51:45 2003
@@ -172,13 +172,22 @@ int attach_pid(task_t *task, enum pid_ty
 	if (pid)
 		atomic_inc(&pid->count);
 	else {
+		struct list_head *elem, *bucket;
+
 		pid = &task->pids[type].pid;
 		pid->nr = nr;
 		atomic_set(&pid->count, 1);
 		INIT_LIST_HEAD(&pid->task_list);
 		pid->task = task;
 		get_task_struct(task);
-		list_add(&pid->hash_chain, &pid_hash[type][pid_hashfn(nr)]);
+		bucket = &pid_hash[type][pid_hashfn(nr)];
+		__list_for_each(elem, bucket) {
+			struct pid *walk;
+			walk = list_entry(elem, struct pid, hash_chain);
+			if (walk->nr > nr)
+		       		break;
+		}
+		list_add_tail(&pid->hash_chain, elem);
 	}
 	list_add_tail(&task->pids[type].pid_chain, &pid->task_list);
 	task->pids[type].pidptr = pid;
@@ -219,6 +228,42 @@ void detach_pid(task_t *task, enum pid_t
 		if (find_pid(type, nr))
 			return;
 	free_pidmap(nr);
+}
+
+/**
+ * find_next_pid - Returns the pid of next task.
+ * @pid: Starting point for the search.
+ *
+ * Returns the pid number of the task that follows behind
+ * "pid". The function works even if the input pid value
+ * is not valid anymore.
+ */
+ int find_next_pid(int pid)
+{
+	struct list_head *elem, *bucket;
+       
+	if(!pid) {
+		bucket = &pid_hash[PIDTYPE_PID][0];
+	} else {
+		bucket = &pid_hash[PIDTYPE_PID][pid_hashfn(pid)];
+	}
+	read_lock(&tasklist_lock);
+next_chain:
+	__list_for_each(elem, bucket) {
+		struct pid *walk;
+		walk = list_entry(elem, struct pid, hash_chain);
+		if (walk->nr > pid) {
+			pid = walk->nr;
+			read_unlock(&tasklist_lock);
+			return pid;
+		}
+	}
+	pid = 0;
+	bucket++;
+	if (bucket < &pid_hash[PIDTYPE_PID][1<<pidhash_shift])
+		goto next_chain;
+	read_unlock(&tasklist_lock);
+	return -1;
 }
 
 task_t *find_task_by_pid(int nr)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/sched.c 901-mjb1.1/kernel/sched.c
--- 000-virgin/kernel/sched.c	Tue Aug  5 20:01:56 2003
+++ 901-mjb1.1/kernel/sched.c	Wed Aug 13 20:51:40 2003
@@ -35,7 +35,7 @@
 #include <linux/cpu.h>
 #include <linux/percpu.h>
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
 #else
 #define cpu_to_node_mask(cpu) (cpu_online_map)
@@ -59,6 +59,11 @@
 #define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
 #define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
 
+/* the FIXED_1 gunk is so running averages don't vanish prematurely */
+#define RAVG_WEIGHT		128
+#define RAVG_FACTOR		(RAVG_WEIGHT*FIXED_1)
+#define RUNNING_AVG(x,y)	(((RAVG_WEIGHT-1)*(x)+RAVG_FACTOR*(y))/RAVG_WEIGHT)
+
 /*
  * These are the 'tuning knobs' of the scheduler:
  *
@@ -66,16 +71,29 @@
  * maximum timeslice is 200 msecs. Timeslices get refilled after
  * they expire.
  */
-#define MIN_TIMESLICE		( 10 * HZ / 1000)
-#define MAX_TIMESLICE		(200 * HZ / 1000)
-#define CHILD_PENALTY		50
-#define PARENT_PENALTY		100
-#define EXIT_WEIGHT		3
-#define PRIO_BONUS_RATIO	25
-#define INTERACTIVE_DELTA	2
-#define MAX_SLEEP_AVG		(10*HZ)
-#define STARVATION_LIMIT	(10*HZ)
-#define NODE_THRESHOLD		125
+int min_timeslice = (10 * HZ) / 1000;
+int max_timeslice = (200 * HZ) / 1000;
+int child_penalty = 50;
+int parent_penalty = 100;
+int exit_weight = 3;
+int prio_bonus_ratio = 25;
+int interactive_delta = 2;
+int max_sleep_avg = 10 * HZ;
+int starvation_limit = 10 * HZ;
+int node_threshold = 125;
+
+#define MIN_TIMESLICE		(min_timeslice)
+#define MAX_TIMESLICE		(max_timeslice)
+#define CHILD_PENALTY		(child_penalty)
+#define PARENT_PENALTY		(parent_penalty)
+#define EXIT_WEIGHT		(exit_weight)
+#define PRIO_BONUS_RATIO	(prio_bonus_ratio)
+#define INTERACTIVE_DELTA	(interactive_delta)
+#define MAX_SLEEP_AVG		(max_sleep_avg)
+#define STARVATION_LIMIT	(starvation_limit)
+#define NODE_THRESHOLD		(node_threshold)
+
+#define TIMESLICE_GRANULARITY (HZ/20 ?: 1)
 
 /*
  * If a task is 'interactive' then we reinsert it in the active
@@ -163,7 +181,7 @@ struct runqueue {
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int prev_cpu_load[NR_CPUS];
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 	atomic_t *node_nr_running;
 	int prev_node_load[MAX_NUMNODES];
 #endif
@@ -171,6 +189,8 @@ struct runqueue {
 	struct list_head migration_queue;
 
 	atomic_t nr_iowait;
+
+	struct sched_info info;
 };
 
 static DEFINE_PER_CPU(struct runqueue, runqueues);
@@ -190,7 +210,7 @@ static DEFINE_PER_CPU(struct runqueue, r
 # define task_running(rq, p)		((rq)->curr == (p))
 #endif
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 
 /*
  * Keep track of running tasks.
@@ -227,13 +247,13 @@ __init void node_nr_running_init(void)
 	}
 }
 
-#else /* !CONFIG_NUMA */
+#else /* !CONFIG_NUMA_SCHED */
 
 # define nr_running_init(rq)   do { } while (0)
 # define nr_running_inc(rq)    do { (rq)->nr_running++; } while (0)
 # define nr_running_dec(rq)    do { (rq)->nr_running--; } while (0)
 
-#endif /* CONFIG_NUMA */
+#endif /* CONFIG_NUMA_SCHED */
 
 /*
  * task_rq_lock - lock the runqueue a given task resides on and disable
@@ -260,6 +280,74 @@ static inline void task_rq_unlock(runque
 	spin_unlock_irqrestore(&rq->lock, *flags);
 }
 
+static inline void sched_info_arrive(task_t *t)
+{
+	unsigned long now  = jiffies;
+	unsigned long diff = now - t->sched_info.last_arrival;
+	struct runqueue *rq = task_rq(t);
+
+	t->sched_info.inter_arrival_time =
+		RUNNING_AVG(t->sched_info.inter_arrival_time, diff);
+	t->sched_info.last_arrival = now;
+
+	if (!rq)
+		return;
+	diff = now - rq->info.last_arrival;
+	rq->info.inter_arrival_time =
+		RUNNING_AVG(rq->info.inter_arrival_time, diff);
+	rq->info.last_arrival = now;
+}
+
+/* is this ever used? */
+static inline void sched_info_depart(task_t *t)
+{
+	struct runqueue *rq = task_rq(t);
+	unsigned long diff, now = jiffies;
+
+	diff = now - t->sched_info.began_service;
+	t->sched_info.service_time =
+		RUNNING_AVG(t->sched_info.service_time, diff);
+
+	if (!rq)
+		return;
+	diff = now - rq->info.began_service;
+	rq->info.service_time =
+		RUNNING_AVG(rq->info.service_time, diff);
+}
+
+static inline void sched_info_switch(task_t *prev, task_t *next)
+{
+	struct runqueue *rq = task_rq(prev);
+	unsigned long diff, now = jiffies;
+
+	/* prev now departs the cpu */
+	sched_info_depart(prev);
+
+	/* only for involuntary context switches */
+	if (prev->state == TASK_RUNNING)
+		sched_info_arrive(prev);
+
+	diff = now - next->sched_info.last_arrival;
+	next->sched_info.response_time =
+		RUNNING_AVG(next->sched_info.response_time, diff);
+	next->sched_info.began_service = now;
+
+	if (!rq)
+		return;
+	/* yes, reusing next's service time is valid */
+	rq->info.response_time =
+		RUNNING_AVG(rq->info.response_time, diff);
+	rq->info.began_service = now;
+
+	if (prev->state != TASK_RUNNING)
+		return;
+	/* if prev arrived subtract rq's last arrival from its arrival */
+	diff = now - rq->info.last_arrival;
+	rq->info.inter_arrival_time =
+		RUNNING_AVG(rq->info.inter_arrival_time, diff);
+	rq->info.last_arrival = now;
+}
+
 /*
  * rq_lock - lock a given runqueue and disable interrupts.
  */
@@ -492,15 +580,18 @@ repeat_lock_task:
 				(p->cpus_allowed & (1UL << smp_processor_id())))) {
 
 				set_task_cpu(p, smp_processor_id());
+				sched_info_arrive(p);
 				task_rq_unlock(rq, &flags);
 				goto repeat_lock_task;
 			}
 			if (old_state == TASK_UNINTERRUPTIBLE)
 				rq->nr_uninterruptible--;
-			if (sync)
+			if (sync) {
+				sched_info_arrive(p);
 				__activate_task(p, rq);
-			else {
+			} else {
 				activate_task(p, rq);
+				sched_info_arrive(p);
 				if (p->prio < rq->curr->prio)
 					resched_task(rq->curr);
 			}
@@ -554,6 +645,7 @@ void wake_up_forked_process(task_t * p)
 	p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
 	p->prio = effective_prio(p);
 	set_task_cpu(p, smp_processor_id());
+	sched_info_arrive(p);
 
 	if (unlikely(!current->array))
 		__activate_task(p, rq);
@@ -679,6 +771,11 @@ unsigned long nr_running(void)
 	return sum;
 }
 
+unsigned long nr_running_cpu(int cpu)
+{
+	return cpu_rq(cpu)->nr_running;
+}
+
 unsigned long nr_uninterruptible(void)
 {
 	unsigned long i, sum = 0;
@@ -715,6 +812,11 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
+void cpu_sched_info(struct sched_info *info, int cpu)
+{
+	memcpy(info, &cpu_rq(cpu)->info, sizeof(struct sched_info));
+}
+
 /*
  * double_rq_lock - safely lock two runqueues
  *
@@ -749,7 +851,7 @@ static inline void double_rq_unlock(runq
 		spin_unlock(&rq2->lock);
 }
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 /*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
@@ -774,38 +876,75 @@ static void sched_migrate_task(task_t *p
  * Find the least loaded CPU.  Slightly favor the current CPU by
  * setting its runqueue length as the minimum to start.
  */
+
 static int sched_best_cpu(struct task_struct *p)
 {
-	int i, minload, load, best_cpu, node = 0;
+	int cpu, node, minload, load, best_cpu, best_node;
+	int this_cpu, this_node, this_node_load;
 	unsigned long cpumask;
 
-	best_cpu = task_cpu(p);
-	if (cpu_rq(best_cpu)->nr_running <= 2)
-		return best_cpu;
+	this_cpu = best_cpu = task_cpu(p);
+	if (cpu_rq(this_cpu)->nr_running <= 2)
+		return this_cpu;
+	this_node = best_node = cpu_to_node(this_cpu);
+
+	/* 
+	 * First look for any node-local idle queue and use that. 
+	 * This improves performance under light loads (mbligh).
+	 * In case this node turns out to be the lightest node, store the best
+	 * cpu that we find, so we don't go sniffing the same runqueues again.
+	 */
+	minload = 10000000;
+	cpumask = node_to_cpumask(this_node);
+	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+		if (!(cpumask & (1UL << cpu)))
+			continue;
+		load = cpu_rq(cpu)->nr_running;
+		if (load == 0)
+			return cpu;
+		if (load < minload) {
+			minload = load;
+			best_cpu = cpu;
+		}
+	}
 
+	/* 
+	 * Now find the lightest loaded node, and put it in best_node
+	 * 
+	 * Node load is always divided by nr_cpus_node to normalise load 
+	 * values in case cpu count differs from node to node. We first 
+	 * multiply node_nr_running by 16 to get a little better resolution.
+	 */
 	minload = 10000000;
-	for_each_node_with_cpus(i) {
-		/*
-		 * Node load is always divided by nr_cpus_node to normalise 
-		 * load values in case cpu count differs from node to node.
-		 * We first multiply node_nr_running by 10 to get a little
-		 * better resolution.   
-		 */
-		load = 10 * atomic_read(&node_nr_running[i]) / nr_cpus_node(i);
+	this_node_load = 16 * atomic_read(&node_nr_running[this_node])
+					/ nr_cpus_node(this_node);
+	for_each_node_with_cpus(node) {
+		if (node == this_node)
+			load = this_node_load;
+		else
+			load = 16 * atomic_read(&node_nr_running[node])
+					/ nr_cpus_node(node);
 		if (load < minload) {
 			minload = load;
-			node = i;
+			best_node = node;
 		}
 	}
 
+	/* If we chose this node, we already did the legwork earlier */
+	if (best_node == this_node)
+		return best_cpu;
+
+	/* Now find the lightest loaded cpu on best_node, and use that */
 	minload = 10000000;
-	cpumask = node_to_cpumask(node);
-	for (i = 0; i < NR_CPUS; ++i) {
-		if (!(cpumask & (1UL << i)))
+	best_cpu = this_cpu;
+	cpumask = node_to_cpumask(best_node);
+	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+		if (!(cpumask & (1UL << cpu)))
 			continue;
-		if (cpu_rq(i)->nr_running < minload) {
-			best_cpu = i;
-			minload = cpu_rq(i)->nr_running;
+		load = cpu_rq(cpu)->nr_running;
+		if (load < minload) {
+			minload = load;
+			best_cpu = cpu;
 		}
 	}
 	return best_cpu;
@@ -856,7 +995,10 @@ static int find_busiest_node(int this_no
 	return node;
 }
 
-#endif /* CONFIG_NUMA */
+#endif /* CONFIG_NUMA_SCHED */
+
+int idle_node_rebalance_ratio = 10;
+int busy_node_rebalance_ratio = 2;
 
 #ifdef CONFIG_SMP
 
@@ -1085,10 +1227,10 @@ out:
  */
 #define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
 #define BUSY_REBALANCE_TICK (HZ/5 ?: 1)
-#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 5)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
+#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * idle_node_rebalance_ratio)
+#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * busy_node_rebalance_ratio)
 
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 static void balance_node(runqueue_t *this_rq, int idle, int this_cpu)
 {
 	int node = find_busiest_node(cpu_to_node(this_cpu));
@@ -1119,7 +1261,7 @@ static void rebalance_tick(runqueue_t *t
 	 * are not balanced.)
 	 */
 	if (idle) {
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 		if (!(j % IDLE_NODE_REBALANCE_TICK))
 			balance_node(this_rq, idle, this_cpu);
 #endif
@@ -1130,7 +1272,7 @@ static void rebalance_tick(runqueue_t *t
 		}
 		return;
 	}
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_NUMA_SCHED
 	if (!(j % BUSY_NODE_REBALANCE_TICK))
 		balance_node(this_rq, idle, this_cpu);
 #endif
@@ -1246,6 +1388,27 @@ void scheduler_tick(int user_ticks, int 
 			enqueue_task(p, rq->expired);
 		} else
 			enqueue_task(p, rq->active);
+	} else {
+		/*
+		 * Prevent a too long timeslice allowing a task to monopolize
+		 * the CPU. We do this by splitting up the timeslice into
+		 * smaller pieces.
+		 *
+		 * Note: this does not mean the task's timeslices expire or
+		 * get lost in any way, they just might be preempted by
+		 * another task of equal priority. (one with higher
+		 * priority would have preempted this task already.) We
+		 * requeue this task to the end of the list on this priority
+		 * level, which is in essence a round-robin of tasks with
+		 * equal priority.
+		 */
+		if (!(p->time_slice % TIMESLICE_GRANULARITY) &&
+			       		(p->array == rq->active)) {
+			dequeue_task(p, rq->active);
+			set_tsk_need_resched(p);
+			p->prio = effective_prio(p);
+			enqueue_task(p, rq->active);
+		}
 	}
 out_unlock:
 	spin_unlock(&rq->lock);
@@ -1258,7 +1421,11 @@ void scheduling_functions_start_here(voi
 /*
  * schedule() is the main scheduler function.
  */
+#ifdef CONFIG_KGDB_THREAD
+asmlinkage void do_schedule(void)
+#else
 asmlinkage void schedule(void)
+#endif
 {
 	task_t *prev, *next;
 	runqueue_t *rq;
@@ -1339,6 +1506,7 @@ switch_tasks:
 
 	if (likely(prev != next)) {
 		rq->nr_switches++;
+		sched_info_switch(prev, next);
 		rq->curr = next;
 
 		prepare_arch_switch(rq, next);
@@ -1485,6 +1653,20 @@ void complete_all(struct completion *x)
 	__wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, 0);
 	spin_unlock_irqrestore(&x->wait.lock, flags);
 }
+
+#ifdef CONFIG_KGDB_THREAD
+asmlinkage void user_schedule(void)
+{
+	current->thread.kgdbregs = NULL;
+	do_schedule();
+}
+
+asmlinkage void kern_do_schedule(struct pt_regs regs)
+{
+	current->thread.kgdbregs = &regs;
+	do_schedule();
+}
+#endif
 
 void wait_for_completion(struct completion *x)
 {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/sys.c 901-mjb1.1/kernel/sys.c
--- 000-virgin/kernel/sys.c	Tue Aug  5 20:01:56 2003
+++ 901-mjb1.1/kernel/sys.c	Wed Aug 13 20:51:50 2003
@@ -235,6 +235,7 @@ cond_syscall(sys_epoll_ctl)
 cond_syscall(sys_epoll_wait)
 cond_syscall(sys_pciconfig_read)
 cond_syscall(sys_pciconfig_write)
+cond_syscall(sys_mbind)
 
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/sysctl.c 901-mjb1.1/kernel/sysctl.c
--- 000-virgin/kernel/sysctl.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/kernel/sysctl.c	Wed Aug 13 20:28:58 2003
@@ -58,6 +58,18 @@ extern int cad_pid;
 extern int pid_max;
 extern int sysctl_lower_zone_protection;
 extern int min_free_kbytes;
+extern int min_timeslice;
+extern int max_timeslice;
+extern int child_penalty;
+extern int parent_penalty;
+extern int exit_weight;
+extern int prio_bonus_ratio;
+extern int interactive_delta;
+extern int max_sleep_avg;
+extern int starvation_limit;
+extern int node_threshold;
+extern int idle_node_rebalance_ratio;
+extern int busy_node_rebalance_ratio;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
@@ -120,6 +132,7 @@ static struct ctl_table_header root_tabl
 
 static ctl_table kern_table[];
 static ctl_table vm_table[];
+static ctl_table sched_table[];
 #ifdef CONFIG_NET
 extern ctl_table net_table[];
 #endif
@@ -199,6 +212,12 @@ static ctl_table root_table[] = {
 		.mode		= 0555,
 		.child		= dev_table,
 	},
+	{
+		.ctl_name	= CTL_SCHED,
+		.procname	= "sched",
+		.mode		= 0555,
+		.child		= sched_table,
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -586,6 +605,7 @@ static ctl_table kern_table[] = {
 /* Constants for minimum and maximum testing in vm_table.
    We use these as one-element integer vectors. */
 static int zero;
+static int one = 1;
 static int one_hundred = 100;
 
 
@@ -805,6 +825,48 @@ static ctl_table debug_table[] = {
 static ctl_table dev_table[] = {
 	{ .ctl_name = 0 }
 };  
+
+static ctl_table sched_table[] = {
+	{SCHED_MAX_TIMESLICE, "max_timeslice", &max_timeslice,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &one, NULL},
+	{SCHED_MIN_TIMESLICE, "min_timeslice", &min_timeslice,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &one, NULL},
+	{SCHED_CHILD_PENALTY, "child_penalty", &child_penalty,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_PARENT_PENALTY, "parent_penalty", &parent_penalty,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_EXIT_WEIGHT, "exit_weight", &exit_weight,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_PRIO_BONUS_RATIO, "prio_bonus_ratio", &prio_bonus_ratio,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_INTERACTIVE_DELTA, "interactive_delta", &interactive_delta,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_MAX_SLEEP_AVG, "max_sleep_avg", &max_sleep_avg,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &one, NULL},
+	{SCHED_STARVATION_LIMIT, "starvation_limit", &starvation_limit,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_NODE_THRESHOLD, "node_threshold", &node_threshold,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 sysctl_intvec, NULL, &one, NULL},
+	{SCHED_IDLE_NODE_REBALANCE_RATIO, "idle_node_rebalance_ratio", 
+						&idle_node_rebalance_ratio,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{SCHED_BUSY_NODE_REBALANCE_RATIO, "busy_node_rebalance_ratio", 
+						&busy_node_rebalance_ratio,
+	 sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	 &sysctl_intvec, NULL, &zero, NULL},
+	{0}
+};
 
 extern void init_irq_proc (void);
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/kernel/timer.c 901-mjb1.1/kernel/timer.c
--- 000-virgin/kernel/timer.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/kernel/timer.c	Wed Aug 13 20:51:40 2003
@@ -756,6 +756,8 @@ static unsigned long count_active_tasks(
  * Requires xtime_lock to access.
  */
 unsigned long avenrun[3];
+unsigned long tasks_running[3];
+DEFINE_PER_CPU(unsigned long[3],cpu_tasks_running);
 
 /*
  * calc_load - given tick count, update the avenrun load estimates.
@@ -763,7 +765,7 @@ unsigned long avenrun[3];
  */
 static inline void calc_load(unsigned long ticks)
 {
-	unsigned long active_tasks; /* fixed-point */
+	unsigned long active_tasks, running_tasks; /* fixed-point */
 	static int count = LOAD_FREQ;
 
 	count -= ticks;
@@ -773,7 +775,37 @@ static inline void calc_load(unsigned lo
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+		running_tasks = nr_running() * FIXED_1;
+		CALC_LOAD(tasks_running[0], EXP_1,  running_tasks);
+		CALC_LOAD(tasks_running[1], EXP_5,  running_tasks);
+		CALC_LOAD(tasks_running[2], EXP_15, running_tasks);
 	}
+}
+
+/*
+ * This does the frequency calculation a little bit different from the
+ * global version above.  It doesn't ever look at the kernel's concept
+ * of time, it just updates that stats every LOAD_FREQ times into the
+ * function.
+ *
+ * Using jiffies is more accurate, but there _are_ just statistics, so
+ * they're not worth messing with xtime_lock and company.  If we miss
+ * an interrupt or two, big deal.
+ */
+void calc_load_cpu(int cpu)
+{
+	unsigned long running_tasks;
+	static DEFINE_PER_CPU(int, count) = { LOAD_FREQ };
+	
+	per_cpu(count, cpu)--;
+	if (per_cpu(count, cpu) != 0)
+		return;
+
+	per_cpu(count, cpu) += LOAD_FREQ;
+	running_tasks = nr_running_cpu(cpu) * FIXED_1;
+	CALC_LOAD(per_cpu(cpu_tasks_running, cpu)[0], EXP_1,  running_tasks);
+	CALC_LOAD(per_cpu(cpu_tasks_running, cpu)[1], EXP_5,  running_tasks);
+	CALC_LOAD(per_cpu(cpu_tasks_running, cpu)[2], EXP_15, running_tasks);
 }
 
 /* jiffies at the most recent update of wall time */
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/Makefile 901-mjb1.1/mm/Makefile
--- 000-virgin/mm/Makefile	Thu Feb 13 11:08:15 2003
+++ 901-mjb1.1/mm/Makefile	Wed Aug 13 20:51:50 2003
@@ -7,8 +7,10 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o 
 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
 			   shmem.o vmalloc.o
 
-obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
+obj-y			:= bootmem.o fadvise.o filemap.o mempool.o oom_kill.o \
 			   page_alloc.o page-writeback.o pdflush.o readahead.o \
 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_NUMA)	+= mbind.o
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/filemap.c 901-mjb1.1/mm/filemap.c
--- 000-virgin/mm/filemap.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/mm/filemap.c	Wed Aug 13 20:29:24 2003
@@ -63,6 +63,9 @@
  *  ->mmap_sem
  *    ->i_shared_sem		(various places)
  *
+ *  ->lock_page
+ *    ->i_shared_sem		(page_convert_anon)
+ *
  *  ->inode_lock
  *    ->sb_lock			(fs/fs-writeback.c)
  *    ->mapping->page_lock	(__sync_single_inode)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/fremap.c 901-mjb1.1/mm/fremap.c
--- 000-virgin/mm/fremap.c	Fri May 30 19:02:24 2003
+++ 901-mjb1.1/mm/fremap.c	Wed Aug 13 20:51:52 2003
@@ -36,7 +36,7 @@ static inline int zap_pte(struct mm_stru
 					set_page_dirty(page);
 				page_remove_rmap(page, ptep);
 				page_cache_release(page);
-				mm->rss--;
+				dec_rss(mm, page);
 			}
 		}
 		return 1;
@@ -60,10 +60,26 @@ int install_page(struct mm_struct *mm, s
 	pgd_t *pgd;
 	pmd_t *pmd;
 	struct pte_chain *pte_chain;
+	unsigned long pgidx;
 
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	if (!pte_chain)
 		goto err;
+
+	/*
+	 * Convert this page to anon for objrmap if it's nonlinear
+	 */
+	pgidx = (addr - vma->vm_start) >> PAGE_SHIFT;
+	pgidx += vma->vm_pgoff;
+	pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
+	if (!PageAnon(page) && (page->index != pgidx)) {
+		lock_page(page);
+		err = page_convert_anon(page);
+		unlock_page(page);
+		if (err < 0)
+			goto err_free;
+	}
+
 	pgd = pgd_offset(mm, addr);
 	spin_lock(&mm->page_table_lock);
 
@@ -77,7 +93,7 @@ int install_page(struct mm_struct *mm, s
 
 	flush = zap_pte(mm, vma, addr, pte);
 
-	mm->rss++;
+	inc_rss(mm, page);
 	flush_icache_page(vma, page);
 	set_pte(pte, mk_pte(page, prot));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
@@ -85,12 +101,11 @@ int install_page(struct mm_struct *mm, s
 	if (flush)
 		flush_tlb_page(vma, addr);
 	update_mmu_cache(vma, addr, *pte);
-	spin_unlock(&mm->page_table_lock);
-	pte_chain_free(pte_chain);
-	return 0;
 
+	err = 0;
 err_unlock:
 	spin_unlock(&mm->page_table_lock);
+err_free:
 	pte_chain_free(pte_chain);
 err:
 	return err;
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/mbind.c 901-mjb1.1/mm/mbind.c
--- 000-virgin/mm/mbind.c	Wed Dec 31 16:00:00 1969
+++ 901-mjb1.1/mm/mbind.c	Wed Aug 13 20:51:50 2003
@@ -0,0 +1,147 @@
+/*
+ * mm/mbind.c
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2003, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/topology.h>
+#include <asm/uaccess.h>
+
+/* Translate a cpumask to a nodemask */
+static inline void cpumask_to_nodemask(unsigned long * cpumask, unsigned long * nodemask)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		if (test_bit(i, cpumask))
+			set_bit(cpu_to_node(i), nodemask);
+}
+
+/*
+ * Adds the zones belonging to @pgdat to @zonelist.  Returns the next 
+ * index in @zonelist.
+ */
+static inline int add_node(pg_data_t *pgdat, struct zonelist *zonelist, int zone_num)
+{
+	int i;
+	struct zone *zone;
+
+	for (i = MAX_NR_ZONES-1; i >=0 ; i--) {
+		zone = pgdat->node_zones + i;
+		if (zone->present_pages)
+			zonelist->zones[zone_num++] = zone;
+	}
+	return zone_num;
+}
+
+/* Builds a binding for a region of memory, based on a bitmask of nodes. */
+static inline int build_binding(unsigned long * nodemask, struct binding *binding)
+{
+	int node, zone_num;
+
+	memset(binding, 0, sizeof(struct binding));
+
+	/* Build binding zonelist */
+	for (node = 0, zone_num = 0; node < MAX_NUMNODES; node++)
+		if (test_bit(node, nodemask) && node_online(node))
+			zone_num = add_node(NODE_DATA(node), 
+				&binding->zonelist, zone_num);
+	binding->zonelist.zones[zone_num] = NULL;
+
+	if (zone_num == 0)
+		/* No zones were added to the zonelist.  Let the caller know. */
+		return -EINVAL;
+
+	return 0;
+} 
+
+
+/*
+ * mbind -  Bind a range of a process' VM space to a set of memory blocks according to
+ *            a predefined policy.
+ * @start:    beginning address of memory region to bind
+ * @len:      length of memory region to bind
+ * @mask_ptr: pointer to bitmask of cpus
+ * @mask_len: length of the bitmask
+ * @policy:   flag specifying the policy to use for the segment
+ */
+asmlinkage unsigned long sys_mbind(unsigned long start, unsigned long len, 
+		unsigned long *mask_ptr, unsigned int mask_len, unsigned long policy)
+{
+	DECLARE_BITMAP(cpu_mask, NR_CPUS);
+	DECLARE_BITMAP(node_mask, MAX_NUMNODES);
+	struct vm_area_struct *vma = NULL;
+	struct address_space *mapping;
+	int copy_len, error = 0;
+
+	/* Deal with getting cpu_mask from userspace & translating to node_mask */
+	CLEAR_BITMAP(cpu_mask, NR_CPUS);
+	CLEAR_BITMAP(node_mask, MAX_NUMNODES);
+	copy_len = min(mask_len, (unsigned int)NR_CPUS);
+	if (copy_from_user(cpu_mask, mask_ptr, (copy_len+7)/8)) {
+		error = -EFAULT;
+		goto out;
+	}
+	cpumask_to_nodemask(cpu_mask, node_mask);
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, start);
+	up_read(&current->mm->mmap_sem);
+	/* This is an ugly, gross hack.  This is purely because I've hurt my
+	 * brain trying to come up with a brilliant way of implementing this 
+	 * for VMA's in general.  Shared Memory VMA's lend themselves to binding
+	 * both because of how they're implemented, and their actual uses.
+	 * If anyone has a great place to squirrel-away some data about the 
+	 * requested binding, and a way to easily force the allocator to respect
+	 * these bindings, then send a patch, or let me know.  Otherwise, this 
+	 * will have to wait for a stroke of insight.
+	 */
+	if (!(vma && vma->vm_file && vma->vm_ops && 
+		vma->vm_ops->nopage == shmem_nopage)) {
+		/* This isn't a shm segment.  For now, we bail. */
+		error = -EINVAL;
+		goto out;
+	}
+
+	mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+	if (mapping->binding) {
+		kfree(mapping->binding);
+		mapping->binding = NULL;
+	}
+	mapping->binding = kmalloc(sizeof(struct binding), GFP_KERNEL);
+	if (!mapping->binding) {
+		error = -ENOMEM;
+		goto out;
+	}
+	error = build_binding(node_mask, mapping->binding);
+	if (error) {
+		kfree(mapping->binding);
+		mapping->binding = NULL;
+	}
+
+out:
+	return error;
+}
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/memory.c 901-mjb1.1/mm/memory.c
--- 000-virgin/mm/memory.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/mm/memory.c	Wed Aug 13 20:51:52 2003
@@ -100,10 +100,10 @@ static inline void free_one_pmd(struct m
 	pte_free_tlb(tlb, page);
 }
 
-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
+static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * pgd, unsigned long pgdi)
 {
-	int j;
-	pmd_t * pmd;
+	pmd_t * pmd, * md, * emd;
+	pgd_t *dir = pgd + pgdi;
 
 	if (pgd_none(*dir))
 		return;
@@ -114,8 +114,21 @@ static inline void free_one_pgd(struct m
 	}
 	pmd = pmd_offset(dir, 0);
 	pgd_clear(dir);
-	for (j = 0; j < PTRS_PER_PMD ; j++)
-		free_one_pmd(tlb, pmd+j);
+	/*
+	 * Beware if changing the loop below.  It once used int j,
+	 * 	for (j = 0; j < PTRS_PER_PMD; j++)
+	 * 		free_one_pmd(pmd+j);
+	 * but some older i386 compilers (e.g. egcs-2.91.66, gcc-2.95.3)
+	 * terminated the loop with a _signed_ address comparison
+	 * using "jle", when configured for HIGHMEM64GB (X86_PAE).
+	 * If also configured for 3GB of kernel virtual address space,
+	 * if page at physical 0x3ffff000 virtual 0x7ffff000 is used as
+	 * a pmd, when that mm exits the loop goes on to free "entries"
+	 * found at 0x80000000 onwards.  The loop below compiles instead
+	 * to be terminated by unsigned address comparison using "jb".
+	 */
+	for (md = pmd, emd = pmd + USER_PTRS_PER_PMD(pgdi); md < emd; md++)
+		free_one_pmd(tlb,md);
 	pmd_free_tlb(tlb, pmd);
 }
 
@@ -128,11 +141,11 @@ static inline void free_one_pgd(struct m
 void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
 {
 	pgd_t * page_dir = tlb->mm->pgd;
-
-	page_dir += first;
+	int index = first;
+	
 	do {
-		free_one_pgd(tlb, page_dir);
-		page_dir++;
+		free_one_pgd(tlb, page_dir, index);
+		index++;
 	} while (--nr);
 }
 
@@ -319,7 +332,7 @@ skip_copy_pte_range:
 					pte = pte_mkclean(pte);
 				pte = pte_mkold(pte);
 				get_page(page);
-				dst->rss++;
+				inc_rss(dst, page);
 
 				set_pte(dst_pte, pte);
 				pte_chain = page_add_rmap(page, dst_pte,
@@ -411,7 +424,14 @@ zap_pte_range(struct mmu_gather *tlb, pm
 					if (page->mapping && pte_young(pte) &&
 							!PageSwapCache(page))
 						mark_page_accessed(page);
-					tlb->freed++;
+					/*
+					 * While we have the page that is being
+					 * freed handy, make sure we decrement
+					 * the mm's RSS accordingly.  This is 
+					 * only important for NUMA per-node
+					 * RSS accounting.
+					 */
+					dec_rss(tlb->mm, page);
 					page_remove_rmap(page, ptep);
 					tlb_remove_page(tlb, page);
 				}
@@ -1037,9 +1057,10 @@ static int do_wp_page(struct mm_struct *
 	page_table = pte_offset_map(pmd, address);
 	if (pte_same(*page_table, pte)) {
 		if (PageReserved(old_page))
-			++mm->rss;
+			inc_rss(mm, new_page);
 		page_remove_rmap(old_page, page_table);
 		break_cow(vma, new_page, address, page_table);
+		SetPageAnon(new_page);
 		pte_chain = page_add_rmap(new_page, page_table, pte_chain);
 		lru_cache_add_active(new_page);
 
@@ -1270,7 +1291,7 @@ static int do_swap_page(struct mm_struct
 	if (vm_swap_full())
 		remove_exclusive_swap_page(page);
 
-	mm->rss++;
+	inc_rss(mm, page);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page))
 		pte = pte_mkdirty(pte_mkwrite(pte));
@@ -1278,6 +1299,7 @@ static int do_swap_page(struct mm_struct
 
 	flush_icache_page(vma, page);
 	set_pte(page_table, pte);
+	SetPageAnon(page);
 	pte_chain = page_add_rmap(page, page_table, pte_chain);
 
 	/* No need to invalidate - it was non-present before */
@@ -1339,10 +1361,11 @@ do_anonymous_page(struct mm_struct *mm, 
 			ret = VM_FAULT_MINOR;
 			goto out;
 		}
-		mm->rss++;
+		inc_rss(mm, page);
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		lru_cache_add_active(page);
 		mark_page_accessed(page);
+		SetPageAnon(page);
 	}
 
 	set_pte(page_table, entry);
@@ -1408,6 +1431,10 @@ retry:
 	if (!pte_chain)
 		goto oom;
 
+	/* See if nopage returned an anon page */
+	if (!new_page->mapping || PageSwapCache(new_page))
+		SetPageAnon(new_page);
+
 	/*
 	 * Should we do an early C-O-W break?
 	 */
@@ -1420,6 +1447,7 @@ retry:
 		copy_user_highpage(page, new_page, address);
 		page_cache_release(new_page);
 		lru_cache_add_active(page);
+		SetPageAnon(page);
 		new_page = page;
 	}
 
@@ -1449,7 +1477,7 @@ retry:
 	 */
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
-		++mm->rss;
+		inc_rss(mm, new_page);
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/mmap.c 901-mjb1.1/mm/mmap.c
--- 000-virgin/mm/mmap.c	Tue Aug  5 20:01:43 2003
+++ 901-mjb1.1/mm/mmap.c	Wed Aug 13 20:51:52 2003
@@ -291,9 +291,7 @@ static void vma_link(struct mm_struct *m
 
 	if (mapping)
 		down(&mapping->i_shared_sem);
-	spin_lock(&mm->page_table_lock);
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
-	spin_unlock(&mm->page_table_lock);
 	if (mapping)
 		up(&mapping->i_shared_sem);
 
@@ -322,6 +320,25 @@ static inline int is_mergeable_vma(struc
 	return 1;
 }
 
+static void move_vma_start(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct inode *inode = NULL;
+	
+	if (vma->vm_file) {
+		inode = vma->vm_file->f_dentry->d_inode;
+		down(&inode->i_mapping->i_shared_sem);
+	}
+	if (inode)
+		__remove_shared_vm_struct(vma, inode);
+	/* If no vm_file, perhaps we should always keep vm_pgoff at 0?? */
+	vma->vm_pgoff += (long)(addr - vma->vm_start) >> PAGE_SHIFT;
+	vma->vm_start = addr;
+	if (inode) {
+		__vma_link_file(vma);
+		up(&inode->i_mapping->i_shared_sem);
+	}
+}
+
 /*
  * Return true if we can merge this (vm_flags,file,vm_pgoff,size)
  * in front of (at a lower virtual address and file offset than) the vma.
@@ -374,8 +391,6 @@ static int vma_merge(struct mm_struct *m
 			unsigned long end, unsigned long vm_flags,
 			struct file *file, unsigned long pgoff)
 {
-	spinlock_t * lock = &mm->page_table_lock;
-
 	/*
 	 * We later require that vma->vm_flags == vm_flags, so this tests
 	 * vma->vm_flags & VM_SPECIAL, too.
@@ -403,7 +418,6 @@ static int vma_merge(struct mm_struct *m
 			down(&inode->i_mapping->i_shared_sem);
 			need_up = 1;
 		}
-		spin_lock(lock);
 		prev->vm_end = end;
 
 		/*
@@ -416,7 +430,6 @@ static int vma_merge(struct mm_struct *m
 			prev->vm_end = next->vm_end;
 			__vma_unlink(mm, next, prev);
 			__remove_shared_vm_struct(next, inode);
-			spin_unlock(lock);
 			if (need_up)
 				up(&inode->i_mapping->i_shared_sem);
 			if (file)
@@ -426,7 +439,6 @@ static int vma_merge(struct mm_struct *m
 			kmem_cache_free(vm_area_cachep, next);
 			return 1;
 		}
-		spin_unlock(lock);
 		if (need_up)
 			up(&inode->i_mapping->i_shared_sem);
 		return 1;
@@ -442,10 +454,7 @@ static int vma_merge(struct mm_struct *m
 				pgoff, (end - addr) >> PAGE_SHIFT))
 			return 0;
 		if (end == prev->vm_start) {
-			spin_lock(lock);
-			prev->vm_start = addr;
-			prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
-			spin_unlock(lock);
+			move_vma_start(prev, addr);
 			return 1;
 		}
 	}
@@ -891,19 +900,16 @@ int expand_stack(struct vm_area_struct *
 	 */
 	address += 4 + PAGE_SIZE - 1;
 	address &= PAGE_MASK;
- 	spin_lock(&vma->vm_mm->page_table_lock);
 	grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
 	/* Overcommit.. */
 	if (security_vm_enough_memory(grow)) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		return -ENOMEM;
 	}
 	
 	if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
 			current->rlim[RLIMIT_AS].rlim_cur) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		vm_unacct_memory(grow);
 		return -ENOMEM;
 	}
@@ -911,7 +917,6 @@ int expand_stack(struct vm_area_struct *
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
-	spin_unlock(&vma->vm_mm->page_table_lock);
 	return 0;
 }
 
@@ -945,19 +950,16 @@ int expand_stack(struct vm_area_struct *
 	 * the spinlock only before relocating the vma range ourself.
 	 */
 	address &= PAGE_MASK;
- 	spin_lock(&vma->vm_mm->page_table_lock);
 	grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
 	/* Overcommit.. */
 	if (security_vm_enough_memory(grow)) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		return -ENOMEM;
 	}
 	
 	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
 			current->rlim[RLIMIT_AS].rlim_cur) {
-		spin_unlock(&vma->vm_mm->page_table_lock);
 		vm_unacct_memory(grow);
 		return -ENOMEM;
 	}
@@ -966,7 +968,6 @@ int expand_stack(struct vm_area_struct *
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
-	spin_unlock(&vma->vm_mm->page_table_lock);
 	return 0;
 }
 
@@ -1129,8 +1130,6 @@ static void unmap_region(struct mm_struc
 /*
  * Create a list of vma's touched by the unmap, removing them from the mm's
  * vma list as we go..
- *
- * Called with the page_table_lock held.
  */
 static void
 detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1174,8 +1173,7 @@ int split_vma(struct mm_struct * mm, str
 
 	if (new_below) {
 		new->vm_end = addr;
-		vma->vm_start = addr;
-		vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT);
+		move_vma_start(vma, addr);
 	} else {
 		vma->vm_end = addr;
 		new->vm_start = addr;
@@ -1254,8 +1252,8 @@ int do_munmap(struct mm_struct *mm, unsi
 	/*
 	 * Remove the vma's, and unmap the actual pages
 	 */
-	spin_lock(&mm->page_table_lock);
 	detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
+	spin_lock(&mm->page_table_lock);
 	unmap_region(mm, mpnt, prev, start, end);
 	spin_unlock(&mm->page_table_lock);
 
@@ -1404,7 +1402,7 @@ void exit_mmap(struct mm_struct *mm)
 	vma = mm->mmap;
 	mm->mmap = mm->mmap_cache = NULL;
 	mm->mm_rb = RB_ROOT;
-	mm->rss = 0;
+	zero_rss(mm);
 	mm->total_vm = 0;
 	mm->locked_vm = 0;
 
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/page_alloc.c 901-mjb1.1/mm/page_alloc.c
--- 000-virgin/mm/page_alloc.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/mm/page_alloc.c	Wed Aug 13 21:09:02 2003
@@ -224,6 +224,8 @@ static inline void free_pages_check(cons
 		bad_page(function, page);
 	if (PageDirty(page))
 		ClearPageDirty(page);
+	if (PageAnon(page))
+		ClearPageAnon(page);
 }
 
 /*
@@ -558,7 +560,11 @@ __alloc_pages(unsigned int gfp_mask, uns
 	min = 1UL << order;
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *z = zones[i];
-
+		
+		if ((__GFP_NODE_STRICT & gfp_mask) && 
+		    (pfn_to_nid(z->zone_start_pfn) != numa_node_id()))
+			continue;
+				
 		min += z->pages_low;
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/rmap.c 901-mjb1.1/mm/rmap.c
--- 000-virgin/mm/rmap.c	Wed Jul  2 21:59:16 2003
+++ 901-mjb1.1/mm/rmap.c	Wed Aug 13 20:51:53 2003
@@ -102,6 +102,136 @@ pte_chain_encode(struct pte_chain *pte_c
  **/
 
 /**
+ * find_pte - Find a pte pointer given a vma and a struct page.
+ * @vma: the vma to search
+ * @page: the page to find
+ *
+ * Determine if this page is mapped in this vma.  If it is, map and rethrn
+ * the pte pointer associated with it.  Return null if the page is not
+ * mapped in this vma for any reason.
+ *
+ * This is strictly an internal helper function for the object-based rmap
+ * functions.
+ * 
+ * It is the caller's responsibility to unmap the pte if it is returned.
+ */
+static inline pte_t *
+find_pte(struct vm_area_struct *vma, struct page *page, unsigned long *addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long loffset;
+	unsigned long address;
+
+	loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+	address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT);
+	if (address < vma->vm_start || address >= vma->vm_end)
+		goto out;
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pmd = pmd_offset(pgd, address);
+	if (!pmd_present(*pmd))
+		goto out;
+
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte))
+		goto out_unmap;
+
+	if (page_to_pfn(page) != pte_pfn(*pte))
+		goto out_unmap;
+
+	if (addr)
+		*addr = address;
+
+	return pte;
+
+out_unmap:
+	pte_unmap(pte);
+out:
+	return NULL;
+}
+
+/**
+ * page_referenced_obj_one - referenced check for object-based rmap
+ * @vma: the vma to look in.
+ * @page: the page we're working on.
+ *
+ * Find a pte entry for a page/vma pair, then check and clear the referenced
+ * bit.
+ *
+ * This is strictly a helper function for page_referenced_obj.
+ */
+static int
+page_referenced_obj_one(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t *pte;
+	int referenced = 0;
+
+	if (!spin_trylock(&mm->page_table_lock))
+		return 1;
+
+	pte = find_pte(vma, page, NULL);
+	if (pte) {
+		if (ptep_test_and_clear_young(pte))
+			referenced++;
+		pte_unmap(pte);
+	}
+
+	spin_unlock(&mm->page_table_lock);
+	return referenced;
+}
+
+/**
+ * page_referenced_obj_one - referenced check for object-based rmap
+ * @page: the page we're checking references on.
+ *
+ * For an object-based mapped page, find all the places it is mapped and
+ * check/clear the referenced flag.  This is done by following the page->mapping
+ * pointer, then walking the chain of vmas it holds.  It returns the number
+ * of references it found.
+ *
+ * This function is only called from page_referenced for object-based pages.
+ *
+ * The semaphore address_space->i_shared_sem is tried.  If it can't be gotten,
+ * assume a reference count of 1.
+ */
+static int
+page_referenced_obj(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma;
+	int referenced = 0;
+
+	if (!page->pte.mapcount)
+		return 0;
+
+	if (!mapping)
+		BUG();
+
+	if (PageSwapCache(page))
+		BUG();
+
+	if (down_trylock(&mapping->i_shared_sem))
+		return 1;
+	
+	list_for_each_entry(vma, &mapping->i_mmap, shared)
+		referenced += page_referenced_obj_one(vma, page);
+
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared)
+		referenced += page_referenced_obj_one(vma, page);
+
+	up(&mapping->i_shared_sem);
+
+	return referenced;
+}
+
+/**
  * page_referenced - test if the page was referenced
  * @page: the page to test
  *
@@ -120,6 +250,10 @@ int page_referenced(struct page * page)
 	if (TestClearPageReferenced(page))
 		referenced++;
 
+	if (!PageAnon(page)) {
+		referenced += page_referenced_obj(page);
+		goto out;
+	}
 	if (PageDirect(page)) {
 		pte_t *pte = rmap_ptep_map(page->pte.direct);
 		if (ptep_test_and_clear_young(pte))
@@ -153,6 +287,7 @@ int page_referenced(struct page * page)
 			__pte_chain_free(pc);
 		}
 	}
+out:
 	return referenced;
 }
 
@@ -175,6 +310,21 @@ page_add_rmap(struct page *page, pte_t *
 
 	pte_chain_lock(page);
 
+	/*
+	 * If this is an object-based page, just count it.  We can
+ 	 * find the mappings by walking the object vma chain for that object.
+	 */
+	if (!PageAnon(page)) {
+		if (!page->mapping)
+			BUG();
+		if (PageSwapCache(page))
+			BUG();
+		if (!page->pte.mapcount)
+			inc_page_state(nr_mapped);
+		page->pte.mapcount++;
+		goto out;
+	}
+
 	if (page->pte.direct == 0) {
 		page->pte.direct = pte_paddr;
 		SetPageDirect(page);
@@ -231,8 +381,25 @@ void page_remove_rmap(struct page *page,
 	pte_chain_lock(page);
 
 	if (!page_mapped(page))
-		goto out_unlock;	/* remap_page_range() from a driver? */
+		goto out_unlock;
 
+	/*
+	 * If this is an object-based page, just uncount it.  We can
+	 * find the mappings by walking the object vma chain for that object.
+	 */
+	if (!PageAnon(page)) {
+		if (!page->mapping)
+			BUG();
+		if (PageSwapCache(page))
+			BUG();
+		if (!page->pte.mapcount)
+			BUG();
+		page->pte.mapcount--;
+		if (!page->pte.mapcount)
+			dec_page_state(nr_mapped);
+		goto out_unlock;
+	}
+  
 	if (PageDirect(page)) {
 		if (page->pte.direct == pte_paddr) {
 			page->pte.direct = 0;
@@ -279,6 +446,102 @@ out_unlock:
 }
 
 /**
+ * try_to_unmap_obj - unmap a page using the object-based rmap method
+ * @page: the page to unmap
+ *
+ * Determine whether a page is mapped in a given vma and unmap it if it's found.
+ *
+ * This function is strictly a helper function for try_to_unmap_obj.
+ */
+static inline int
+try_to_unmap_obj_one(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte;
+	pte_t pteval;
+	int ret = SWAP_AGAIN;
+
+	if (!spin_trylock(&mm->page_table_lock))
+		return ret;
+
+	pte = find_pte(vma, page, &address);
+	if (!pte)
+		goto out;
+
+	if (vma->vm_flags & VM_LOCKED) {
+		ret =  SWAP_FAIL;
+		goto out_unmap;
+	}
+
+	flush_cache_page(vma, address);
+	pteval = ptep_get_and_clear(pte);
+	flush_tlb_page(vma, address);
+
+	if (pte_dirty(pteval))
+		set_page_dirty(page);
+
+	if (!page->pte.mapcount)
+		BUG();
+
+	mm->rss--;
+	page->pte.mapcount--;
+	page_cache_release(page);
+
+out_unmap:
+	pte_unmap(pte);
+
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
+/**
+ * try_to_unmap_obj - unmap a page using the object-based rmap method
+ * @page: the page to unmap
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the address_space struct it points to.
+ *
+ * This function is only called from try_to_unmap for object-based pages.
+ *
+ * The semaphore address_space->i_shared_sem is tried.  If it can't be gotten,
+ * return a temporary error.
+ */
+static int
+try_to_unmap_obj(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma;
+	int ret = SWAP_AGAIN;
+
+	if (!mapping)
+		BUG();
+
+	if (PageSwapCache(page))
+		BUG();
+
+	if (down_trylock(&mapping->i_shared_sem))
+		return ret;
+	
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		ret = try_to_unmap_obj_one(vma, page);
+		if (ret == SWAP_FAIL || !page->pte.mapcount)
+			goto out;
+	}
+
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		ret = try_to_unmap_obj_one(vma, page);
+		if (ret == SWAP_FAIL || !page->pte.mapcount)
+			goto out;
+	}
+
+out:
+	up(&mapping->i_shared_sem);
+	return ret;
+}
+
+/**
  * try_to_unmap_one - worker function for try_to_unmap
  * @page: page to unmap
  * @ptep: page table entry to unmap from page
@@ -360,7 +623,7 @@ static int try_to_unmap_one(struct page 
 	if (pte_dirty(pte))
 		set_page_dirty(page);
 
-	mm->rss--;
+	dec_rss(mm, page);
 	page_cache_release(page);
 	ret = SWAP_SUCCESS;
 
@@ -397,6 +660,15 @@ int try_to_unmap(struct page * page)
 	if (!page->mapping)
 		BUG();
 
+	/*
+	 * If it's an object-based page, use the object vma chain to find all
+	 * the mappings.
+	 */
+	if (!PageAnon(page)) {
+		ret = try_to_unmap_obj(page);
+		goto out;
+	}
+
 	if (PageDirect(page)) {
 		ret = try_to_unmap_one(page, page->pte.direct);
 		if (ret == SWAP_SUCCESS) {
@@ -452,9 +724,112 @@ int try_to_unmap(struct page * page)
 		}
 	}
 out:
-	if (!page_mapped(page))
+	if (!page_mapped(page)) {
 		dec_page_state(nr_mapped);
+		ret = SWAP_SUCCESS;
+	}
 	return ret;
+}
+
+/**
+ * page_convert_anon - Convert an object-based mapped page to pte_chain-based.
+ * @page: the page to convert
+ *
+ * Find all the mappings for an object-based page and convert them
+ * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
+ *
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag,
+ * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This
+ * means there is a period when PageAnon is set, but still has some mappings
+ * with no pte_chain entry.  This is in fact safe, since page_remove_rmap will
+ * simply not find it.  try_to_unmap might erroneously return success, but it
+ * will never be called because the page_convert_anon() caller has locked the
+ * page.
+ *
+ * page_referenced() may fail to scan all the appropriate pte's and may return
+ * an inaccurate result.  This is so rare that it does not matter.
+ */
+int page_convert_anon(struct page *page)
+{
+	struct address_space *mapping;
+	struct vm_area_struct *vma;
+	struct pte_chain *pte_chain = NULL;
+	pte_t *pte;
+	int err = 0;
+
+	mapping = page->mapping;
+	if (mapping == NULL)
+		goto out;		/* truncate won the lock_page() race */
+
+	down(&mapping->i_shared_sem);
+	pte_chain_lock(page);
+
+	/*
+	 * Has someone else done it for us before we got the lock?
+	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
+	 */
+	if (PageAnon(page)) {
+		pte_chain_unlock(page);
+		goto out_unlock;
+	}
+
+	SetPageAnon(page);
+	if (page->pte.mapcount == 0) {
+		pte_chain_unlock(page);
+		goto out_unlock;
+	}
+	/* This is gonna get incremented by page_add_rmap */
+	dec_page_state(nr_mapped);
+	page->pte.mapcount = 0;
+
+	/*
+	 * Now that the page is marked as anon, unlock it.  page_add_rmap will
+	 * lock it as necessary.
+	 */
+	pte_chain_unlock(page);
+
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
+		pte = find_pte(vma, page, NULL);
+		if (pte) {
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
+			pte_unmap(pte);
+		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
+	}
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
+		pte = find_pte(vma, page, NULL);
+		if (pte) {
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
+			pte_unmap(pte);
+		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
+	}
+
+out_unlock:
+	pte_chain_free(pte_chain);
+	up(&mapping->i_shared_sem);
+out:
+	return err;
 }
 
 /**
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/mm/swapfile.c 901-mjb1.1/mm/swapfile.c
--- 000-virgin/mm/swapfile.c	Wed Aug 13 20:24:33 2003
+++ 901-mjb1.1/mm/swapfile.c	Wed Aug 13 20:51:53 2003
@@ -386,9 +386,10 @@ static void
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
 	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
-	vma->vm_mm->rss++;
+	inc_rss(vma->vm_mm, page);
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+	SetPageAnon(page);
 	*pte_chainp = page_add_rmap(page, dir, *pte_chainp);
 	swap_free(entry);
 }
@@ -497,6 +498,7 @@ static int unuse_process(struct mm_struc
 	/*
 	 * Go through process' page directory.
 	 */
+	down_read(&mm->mmap_sem);
 	spin_lock(&mm->page_table_lock);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
@@ -504,6 +506,7 @@ static int unuse_process(struct mm_struc
 			break;
 	}
 	spin_unlock(&mm->page_table_lock);
+	up_read(&mm->mmap_sem);
 	pte_chain_free(pte_chain);
 	return 0;
 }
diff -urpN -X /home/fletch/.diff.exclude 000-virgin/scripts/Makefile.build 901-mjb1.1/scripts/Makefile.build
--- 000-virgin/scripts/Makefile.build	Sat Jun 14 18:37:41 2003
+++ 901-mjb1.1/scripts/Makefile.build	Wed Aug 13 20:51:56 2003
@@ -119,7 +119,16 @@ cmd_cc_i_c       = $(CPP) $(c_flags)   -
 quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@
 
 ifndef CONFIG_MODVERSIONS
-cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+new1_c_flags = $(c_flags:-I%=-I$(TOPDIR)/%)
+new2_c_flags = $(new1_c_flags:-Wp%=)
+PWD = $(TOPDIR)
+
+quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@
+cmd_cc_o_c = $(CC) $(c_flags) -E -o $@ $< \
+		&& cd $(dir $<) \
+		&& $(CC) $(new2_c_flags) -c -o $(notdir $@) $(notdir $<) \
+		&& cd $(TOPDIR)
+#cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
 
 else
 # When module versioning is enabled the following steps are executed:
@@ -134,12 +143,21 @@ else
 #   replace the unresolved symbols __crc_exported_symbol with
 #   the actual value of the checksum generated by genksyms
 
-cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
+new1_c_flags = $(c_flags:-I%=-I$(TOPDIR)/%)
+new2_c_flags = $(new1_c_flags:-Wp%=)
+PWD = $(TOPDIR)
+
+quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@
+cmd_cc_o_c = $(CC) $(c_flags) -E -o $@ $< \
+		&& cd $(dir $<) \
+		&& $(CC) $(new2_c_flags) -c -o .tmp_$(@F) $(notdir $<) \
+		&& cd $(TOPDIR)
+#cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
 cmd_modversions =							\
 	if ! $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then	\
 		mv $(@D)/.tmp_$(@F) $@;					\
 	else								\
-		$(CPP) -D__GENKSYMS__ $(c_flags) $<			\
+		$(CPP) -D__GENKSYMS__ $(new2_c_flags) $<		\
 		| $(GENKSYMS)						\
 		> $(@D)/.tmp_$(@F:.o=.ver);				\
 									\