--- linux-2.6.3/arch/alpha/Kconfig 2004-02-03 20:42:34.000000000 -0800
+++ 25/arch/alpha/Kconfig 2004-02-20 00:20:34.000000000 -0800
@@ -569,24 +569,6 @@ config VERBOSE_MCHECK_ON
source "drivers/pci/Kconfig"
source "drivers/eisa/Kconfig"
-config HOTPLUG
- bool "Support for hot-pluggable devices"
- ---help---
- Say Y here if you want to plug devices into your computer while
- the system is running, and be able to use them quickly. In many
- cases, the devices can likewise be unplugged at any time too.
-
- One well known example of this is PCMCIA- or PC-cards, credit-card
- size devices such as network cards, modems or hard drives which are
- plugged into slots found on all modern laptop computers. Another
- example, used on modern desktops as well as laptops, is USB.
-
- Enable HOTPLUG and KMOD, and build a modular kernel. Get agent
- software (at ) and install it.
- Then your kernel will automatically call out to a user mode "policy
- agent" (/sbin/hotplug) to load modules and set up software needed
- to use devices as you hotplug them.
-
source "drivers/pcmcia/Kconfig"
config SRM_ENV
--- linux-2.6.3/arch/alpha/kernel/alpha_ksyms.c 2003-06-22 12:04:43.000000000 -0700
+++ 25/arch/alpha/kernel/alpha_ksyms.c 2004-02-20 00:21:26.000000000 -0800
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
#include
--- linux-2.6.3/arch/alpha/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/alpha/kernel/irq.c 2004-02-20 00:19:59.000000000 -0800
@@ -252,7 +252,7 @@ static int
irq_affinity_read_proc (char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_snprintf(page, count, irq_affinity[(long)data]);
+ int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
@@ -333,7 +333,7 @@ static int
prof_cpu_mask_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_snprintf(page, count, *(cpumask_t *)data);
+ int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
--- linux-2.6.3/arch/alpha/kernel/osf_sys.c 2003-08-08 22:55:10.000000000 -0700
+++ 25/arch/alpha/kernel/osf_sys.c 2004-02-20 00:21:23.000000000 -0800
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -46,7 +47,6 @@
#include
extern int do_pipe(int *);
-extern asmlinkage unsigned long sys_brk(unsigned long);
/*
* Brk needs to return an error. Still support Linux's brk(0) query idiom,
@@ -821,7 +821,6 @@ osf_setsysinfo(unsigned long op, void *b
affects all sorts of things, like timeval and itimerval. */
extern struct timezone sys_tz;
-extern asmlinkage int sys_utimes(char *, struct timeval *);
extern int do_adjtimex(struct timex *);
struct timeval32
@@ -1315,8 +1314,6 @@ arch_get_unmapped_area(struct file *filp
}
#ifdef CONFIG_OSF4_COMPAT
-extern ssize_t sys_readv(unsigned long, const struct iovec *, unsigned long);
-extern ssize_t sys_writev(unsigned long, const struct iovec *, unsigned long);
/* Clear top 32 bits of iov_len in the user's buffer for
compatibility with old versions of OSF/1 where iov_len
--- linux-2.6.3/arch/alpha/kernel/time.c 2003-10-08 15:07:08.000000000 -0700
+++ 25/arch/alpha/kernel/time.c 2004-02-20 00:19:59.000000000 -0800
@@ -503,6 +503,7 @@ do_settimeofday(struct timespec *tv)
time_esterror = NTP_PHASE_LIMIT;
write_sequnlock_irq(&xtime_lock);
+ clock_was_set();
return 0;
}
--- linux-2.6.3/arch/arm26/Kconfig 2003-09-27 18:57:43.000000000 -0700
+++ 25/arch/arm26/Kconfig 2004-02-20 00:20:34.000000000 -0800
@@ -118,24 +118,6 @@ config XIP_KERNEL
Select this option to create a kernel that can be programed into
the OS ROMs.
-config HOTPLUG
- bool "Support for hot-pluggable devices"
- ---help---
- Say Y here if you want to plug devices into your computer while
- the system is running, and be able to use them quickly. In many
- cases, the devices can likewise be unplugged at any time too.
-
- One well known example of this is PCMCIA- or PC-cards, credit-card
- size devices such as network cards, modems or hard drives which are
- plugged into slots found on all modern laptop computers. Another
- example, used on modern desktops as well as laptops, is USB.
-
- Enable HOTPLUG and KMOD, and build a modular kernel. Get agent
- software (at ) and install it.
- Then your kernel will automatically call out to a user mode "policy
- agent" (/sbin/hotplug) to load modules and set up software needed
- to use devices as you hotplug them.
-
comment "At least one math emulation must be selected"
config FPE_NWFPE
@@ -216,11 +198,6 @@ source "drivers/input/Kconfig"
source "drivers/char/Kconfig"
-config KBDMOUSE
- bool
- depends on ARCH_ACORN && BUSMOUSE=y
- default y
-
source "drivers/media/Kconfig"
source "fs/Kconfig"
--- linux-2.6.3/arch/arm26/kernel/armksyms.c 2003-06-14 12:18:07.000000000 -0700
+++ 25/arch/arm26/kernel/armksyms.c 2004-02-20 00:21:22.000000000 -0800
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
#include
@@ -43,14 +44,6 @@ extern void outswb(unsigned int port, co
extern void __bad_xchg(volatile void *ptr, int size);
/*
- * syscalls
- */
-extern int sys_write(int, const char *, int);
-extern int sys_read(int, char *, int);
-extern int sys_lseek(int, off_t, int);
-extern int sys_exit(int);
-
-/*
* libgcc functions - functions that are used internally by the
* compiler... (prototypes are not correct though, but that
* doesn't really matter since they're not versioned).
--- linux-2.6.3/arch/arm26/kernel/sys_arm.c 2003-06-14 12:18:23.000000000 -0700
+++ 25/arch/arm26/kernel/sys_arm.c 2004-02-20 00:21:22.000000000 -0800
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -138,7 +139,6 @@ out:
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls.
*/
-extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
struct sel_arg_struct {
unsigned long n;
--- linux-2.6.3/arch/arm26/kernel/time.c 2003-10-08 15:07:08.000000000 -0700
+++ 25/arch/arm26/kernel/time.c 2004-02-20 00:19:59.000000000 -0800
@@ -179,6 +179,7 @@ int do_settimeofday(struct timespec *tv)
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
write_sequnlock_irq(&xtime_lock);
+ clock_was_set();
return 0;
}
--- linux-2.6.3/arch/arm/Kconfig 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/arm/Kconfig 2004-02-20 00:20:34.000000000 -0800
@@ -365,24 +365,6 @@ endif
source "drivers/pci/Kconfig"
-config HOTPLUG
- bool "Support for hot-pluggable devices"
- ---help---
- Say Y here if you want to plug devices into your computer while
- the system is running, and be able to use them quickly. In many
- cases, the devices can likewise be unplugged at any time too.
-
- One well known example of this is PCMCIA- or PC-cards, credit-card
- size devices such as network cards, modems or hard drives which are
- plugged into slots found on all modern laptop computers. Another
- example, used on modern desktops as well as laptops, is USB.
-
- Enable HOTPLUG and KMOD, and build a modular kernel. Get agent
- software (at ) and install it.
- Then your kernel will automatically call out to a user mode "policy
- agent" (/sbin/hotplug) to load modules and set up software needed
- to use devices as you hotplug them.
-
source "drivers/pcmcia/Kconfig"
comment "At least one math emulation must be selected"
@@ -639,6 +621,8 @@ source "drivers/media/Kconfig"
source "fs/Kconfig"
+source "arch/arm/oprofile/Kconfig"
+
source "drivers/video/Kconfig"
if ARCH_ACORN || ARCH_CLPS7500 || ARCH_TBOX || ARCH_SHARK || ARCH_SA1100 || PCI
--- linux-2.6.3/arch/arm/kernel/armksyms.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/arm/kernel/armksyms.c 2004-02-20 00:21:22.000000000 -0800
@@ -22,6 +22,7 @@
#include
#include
#include
+#include
#include
#include
@@ -44,14 +45,6 @@ extern void outswb(unsigned int port, co
extern void __bad_xchg(volatile void *ptr, int size);
/*
- * syscalls
- */
-extern int sys_write(int, const char *, int);
-extern int sys_read(int, char *, int);
-extern int sys_lseek(int, off_t, int);
-extern int sys_exit(int);
-
-/*
* libgcc functions - functions that are used internally by the
* compiler... (prototypes are not correct though, but that
* doesn't really matter since they're not versioned).
--- linux-2.6.3/arch/arm/kernel/sys_arm.c 2003-06-14 12:17:59.000000000 -0700
+++ 25/arch/arm/kernel/sys_arm.c 2004-02-20 00:21:22.000000000 -0800
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -137,7 +138,6 @@ out:
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls.
*/
-extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
struct sel_arg_struct {
unsigned long n;
--- linux-2.6.3/arch/arm/kernel/time.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/arm/kernel/time.c 2004-02-20 00:20:36.000000000 -0800
@@ -85,6 +85,9 @@ unsigned long long __attribute__((weak))
*/
static inline void do_profile(struct pt_regs *regs)
{
+
+ profile_hook(regs);
+
if (!user_mode(regs) &&
prof_buffer &&
current->pid) {
@@ -175,7 +178,7 @@ static int __init leds_init(void)
int ret;
ret = sysdev_class_register(&leds_sysclass);
if (ret == 0)
- ret = sys_device_register(&leds_device);
+ ret = sysdev_register(&leds_device);
return ret;
}
--- linux-2.6.3/arch/arm/mach-integrator/integrator_ap.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/arm/mach-integrator/integrator_ap.c 2004-02-20 00:20:36.000000000 -0800
@@ -173,7 +173,7 @@ static int __init irq_init_sysfs(void)
{
int ret = sysdev_class_register(&irq_class);
if (ret == 0)
- ret = sys_device_register(&irq_device);
+ ret = sysdev_register(&irq_device);
return ret;
}
--- linux-2.6.3/arch/arm/mach-sa1100/irq.c 2003-06-22 12:04:43.000000000 -0700
+++ 25/arch/arm/mach-sa1100/irq.c 2004-02-20 00:20:36.000000000 -0800
@@ -278,7 +278,7 @@ static struct sys_device sa1100irq_devic
static int __init sa1100irq_init_devicefs(void)
{
sysdev_class_register(&sa1100irq_sysclass);
- return sys_device_register(&sa1100irq_device);
+ return sysdev_register(&sa1100irq_device);
}
device_initcall(sa1100irq_init_devicefs);
--- linux-2.6.3/arch/arm/Makefile 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/arm/Makefile 2004-02-20 00:19:59.000000000 -0800
@@ -116,6 +116,7 @@ endif
core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/
core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ)
+drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/
drivers-$(CONFIG_ARCH_CLPS7500) += drivers/acorn/char/
drivers-$(CONFIG_ARCH_L7200) += drivers/acorn/char/
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/arm/oprofile/init.c 2004-02-20 00:19:59.000000000 -0800
@@ -0,0 +1,22 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2004 Oprofile Authors
+ *
+ * @author Zwane Mwaikambo
+ */
+
+#include
+#include
+#include
+
+int oprofile_arch_init(struct oprofile_operations **ops)
+{
+ int ret = -ENODEV;
+
+ return ret;
+}
+
+void oprofile_arch_exit(void)
+{
+}
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/arm/oprofile/Kconfig 2004-02-20 00:19:59.000000000 -0800
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+ depends on EXPERIMENTAL
+
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ If unsure, say N.
+
+endmenu
+
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/arm/oprofile/Makefile 2004-02-20 00:19:59.000000000 -0800
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o
--- linux-2.6.3/arch/cris/arch-v10/drivers/ethernet.c 2003-07-10 18:50:30.000000000 -0700
+++ 25/arch/cris/arch-v10/drivers/ethernet.c 2004-02-20 00:19:59.000000000 -0800
@@ -482,7 +482,7 @@ etrax_ethernet_init(void)
/* Register device */
err = register_netdev(dev);
if (err) {
- kfree(dev);
+ free_netdev(dev);
return err;
}
--- linux-2.6.3/arch/cris/kernel/sys_cris.c 2003-07-10 18:50:30.000000000 -0700
+++ 25/arch/cris/kernel/sys_cris.c 2004-02-20 00:21:25.000000000 -0800
@@ -11,6 +11,7 @@
#include
#include
+#include
#include
#include
#include
--- linux-2.6.3/arch/cris/kernel/time.c 2003-10-08 15:07:08.000000000 -0700
+++ 25/arch/cris/kernel/time.c 2004-02-20 00:19:59.000000000 -0800
@@ -108,6 +108,7 @@ int do_settimeofday(struct timespec *tv)
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
local_irq_restore(flags);
+ clock_was_set();
return 0;
}
--- linux-2.6.3/arch/h8300/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700
+++ 25/arch/h8300/kernel/signal.c 2004-02-20 00:21:23.000000000 -0800
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -46,8 +47,6 @@
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-asmlinkage long sys_wait4(pid_t pid, unsigned int * stat_addr, int options,
- struct rusage * ru);
asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs);
/*
--- linux-2.6.3/arch/h8300/kernel/sys_h8300.c 2003-08-08 22:55:10.000000000 -0700
+++ 25/arch/h8300/kernel/sys_h8300.c 2004-02-20 00:21:28.000000000 -0800
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -155,8 +156,6 @@ out:
}
#endif
-extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
-
struct sel_arg_struct {
unsigned long n;
fd_set *inp, *outp, *exp;
@@ -261,7 +260,7 @@ asmlinkage int sys_ipc (uint call, int f
return -EINVAL;
}
-asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on)
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on)
{
return -ENOSYS;
}
--- linux-2.6.3/arch/h8300/kernel/time.c 2003-11-23 19:03:00.000000000 -0800
+++ 25/arch/h8300/kernel/time.c 2004-02-20 00:19:59.000000000 -0800
@@ -139,6 +139,7 @@ int do_settimeofday(struct timespec *tv)
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
write_sequnlock_irq(&xtime_lock);
+ clock_was_set();
return 0;
}
--- linux-2.6.3/arch/i386/boot/Makefile 2003-09-27 18:57:43.000000000 -0700
+++ 25/arch/i386/boot/Makefile 2004-02-20 00:19:59.000000000 -0800
@@ -31,6 +31,8 @@ subdir- := compressed
host-progs := tools/build
+HOSTCFLAGS_build.o := -Iinclude
+
# ---------------------------------------------------------------------------
$(obj)/zImage: IMAGE_OFFSET := 0x1000
--- linux-2.6.3/arch/i386/boot/setup.S 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/boot/setup.S 2004-02-20 00:21:53.000000000 -0800
@@ -164,7 +164,7 @@ cmd_line_ptr: .long 0 # (Header versio
# can be located anywhere in
# low memory 0x10000 or higher.
-ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later)
+ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later)
# The highest safe address for
# the contents of an initrd
@@ -776,7 +776,7 @@ end_move_self: # now we are at the r
# AMD Elan bug fix by Robert Schwebel.
#
-#if defined(CONFIG_MELAN)
+#if defined(CONFIG_X86_ELAN)
movb $0x02, %al # alternate A20 gate
outb %al, $0x92 # this works on SC410/SC520
a20_elan_wait:
--- linux-2.6.3/arch/i386/Kconfig 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/Kconfig 2004-02-20 00:21:53.000000000 -0800
@@ -43,6 +43,15 @@ config X86_PC
help
Choose this option if your computer is a standard PC or compatible.
+config X86_ELAN
+ bool "AMD Elan"
+ help
+ Select this for an AMD Elan processor.
+
+ Do not use this option for K6/Athlon/Opteron processors!
+
+ If unsure, choose "PC-compatible" instead.
+
config X86_VOYAGER
bool "Voyager (NCR)"
help
@@ -130,6 +139,8 @@ config ES7000_CLUSTERED_APIC
default y
depends on SMP && X86_ES7000 && MPENTIUMIII
+if !X86_ELAN
+
choice
prompt "Processor family"
default M686
@@ -222,14 +233,20 @@ config MPENTIUMIII
extended prefetch instructions in addition to the Pentium II
extensions.
+config MPENTIUMM
+ bool "Pentium M"
+ help
+ Select this for Intel Pentium M (not Pentium-4 M)
+ notebook chips.
+
config MPENTIUM4
- bool "Pentium-4/Celeron(P4-based)/Xeon"
+ bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
help
- Select this for Intel Pentium 4 chips. This includes both
- the Pentium 4 and P4-based Celeron chips. This option
- enables compile flags optimized for the chip, uses the
- correct cache shift, and applies any applicable Pentium III
- optimizations.
+ Select this for Intel Pentium 4 chips. This includes the
+ Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
+ (not Pentium M) chips. This option enables compile flags
+ optimized for the chip, uses the correct cache shift, and
+ applies any applicable Pentium III optimizations.
config MK6
bool "K6/K6-II/K6-III"
@@ -312,6 +329,8 @@ config X86_GENERIC
when it has moderate overhead. This is intended for generic
distributions kernels.
+endif
+
#
# Define implied options from the CPU selection here
#
@@ -328,9 +347,9 @@ config X86_XADD
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || X86_GENERIC
- default "4" if MELAN || M486 || M386
+ default "4" if X86_ELAN || M486 || M386
default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2
- default "6" if MK7 || MK8
+ default "6" if MK7 || MK8 || MPENTIUMM
config RWSEM_GENERIC_SPINLOCK
bool
@@ -374,22 +393,22 @@ config X86_POPAD_OK
config X86_ALIGNMENT_16
bool
- depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
default y
config X86_GOOD_APIC
bool
- depends on MK7 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8
+ depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8
default y
config X86_INTEL_USERCOPY
bool
- depends on MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7
default y
config X86_USE_PPRO_CHECKSUM
bool
- depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2
default y
config X86_USE_3DNOW
@@ -402,6 +421,54 @@ config X86_OOSTORE
depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
default y
+config X86_4G
+ bool "4 GB kernel-space and 4 GB user-space virtual memory support"
+ help
+ This option is only useful for systems that have more than 1 GB
+ of RAM.
+
+ The default kernel VM layout leaves 1 GB of virtual memory for
+ kernel-space mappings, and 3 GB of VM for user-space applications.
+ This option ups both the kernel-space VM and the user-space VM to
+ 4 GB.
+
+ The cost of this option is additional TLB flushes done at
+ system-entry points that transition from user-mode into kernel-mode.
+ I.e. system calls and page faults, and IRQs that interrupt user-mode
+ code. There's also additional overhead to kernel operations that copy
+ memory to/from user-space. The overhead from this is hard to tell and
+ depends on the workload - it can be anything from no visible overhead
+ to 20-30% overhead. A good rule of thumb is to count with a runtime
+ overhead of 20%.
+
+ The upside is the much increased kernel-space VM, which more than
+ quadruples the maximum amount of RAM supported. Kernels compiled with
+ this option boot on 64GB of RAM and still have more than 3.1 GB of
+ 'lowmem' left. Another bonus is that highmem IO bouncing decreases,
+ if used with drivers that still use bounce-buffers.
+
+ There's also a 33% increase in user-space VM size - database
+ applications might see a boost from this.
+
+ But the cost of the TLB flushes and the runtime overhead has to be
+ weighed against the bonuses offered by the larger VM spaces. The
+ dividing line depends on the actual workload - there might be 4 GB
+ systems that benefit from this option. Systems with less than 4 GB
+ of RAM will rarely see a benefit from this option - but it's not
+ out of question, the exact circumstances have to be considered.
+
+config X86_SWITCH_PAGETABLES
+ def_bool X86_4G
+
+config X86_4G_VM_LAYOUT
+ def_bool X86_4G
+
+config X86_UACCESS_INDIRECT
+ def_bool X86_4G
+
+config X86_HIGH_ENTRY
+ def_bool X86_4G
+
config HPET_TIMER
bool "HPET Timer Support"
help
@@ -459,6 +526,16 @@ config NR_CPUS
This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image.
+config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+ default off
+ help
+ SMT scheduler support improves the CPU scheduler's decision making
+ when dealing with Intel Pentium 4 chips with HyperThreading at a
+ cost of slightly increased overhead in some places. If unsure say
+ N here.
+
config PREEMPT
bool "Preemptible Kernel"
help
@@ -513,7 +590,7 @@ config X86_IO_APIC
config X86_TSC
bool
- depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
+ depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
default y
config X86_MCE
@@ -603,8 +680,6 @@ config MICROCODE
To compile this driver as a module, choose M here: the
module will be called microcode.
- If you use modprobe or kmod you may also want to add the line
- 'alias char-major-10-184 microcode' to your /etc/modules.conf file.
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -701,7 +776,7 @@ config X86_PAE
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation Support"
- depends on SMP && HIGHMEM64G && (X86_PC || X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
+ depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT)
@@ -809,6 +884,14 @@ config EFI
anything about EFI). However, even with this option, the resultant
kernel should continue to boot on existing non-EFI platforms.
+config IRQBALANCE
+ bool "Enable kernel irq balancing"
+ depends on SMP
+ default y
+ help
+ The defalut yes will allow the kernel to do irq load balancing.
+ Saying no will keep the kernel from doing irq load balancing.
+
config HAVE_DEC_LOCK
bool
depends on (SMP || PREEMPT) && X86_CMPXCHG
@@ -821,6 +904,19 @@ config BOOT_IOREMAP
depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
default y
+config REGPARM
+ bool "Use register arguments (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ default n
+ help
+ Compile the kernel with -mregparm=3. This uses an different ABI
+ and passes the first three arguments of a function call in registers.
+ This will probably break binary only modules.
+
+ This feature is only enabled for gcc-3.0 and later - earlier compilers
+ generate incorrect output with certain kernel constructs when
+ -mregparm=3 is used.
+
endmenu
@@ -1030,12 +1126,16 @@ config PCI_GOBIOS
PCI-based systems don't have any BIOS at all. Linux can also try to
detect the PCI hardware directly without using the BIOS.
- With this option, you can specify how Linux should detect the PCI
- devices. If you choose "BIOS", the BIOS will be used, if you choose
- "Direct", the BIOS won't be used, and if you choose "Any", the
- kernel will try the direct access method and falls back to the BIOS
- if that doesn't work. If unsure, go with the default, which is
- "Any".
+ With this option, you can specify how Linux should detect the
+ PCI devices. If you choose "BIOS", the BIOS will be used,
+ if you choose "Direct", the BIOS won't be used, and if you
+ choose "MMConfig", then PCI Express MMCONFIG will be used.
+ If you choose "Any", the kernel will try MMCONFIG, then the
+ direct access method and falls back to the BIOS if that doesn't
+ work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+ bool "MMConfig"
config PCI_GODIRECT
bool "Direct"
@@ -1055,6 +1155,12 @@ config PCI_DIRECT
depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
default y
+config PCI_MMCONFIG
+ bool
+ depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+ select ACPI_BOOT
+ default y
+
config PCI_USE_VECTOR
bool "Vector-based interrupt indexing"
depends on X86_LOCAL_APIC && X86_IO_APIC
@@ -1131,24 +1237,6 @@ config SCx200
This support is also available as a module. If compiled as a
module, it will be called scx200.
-config HOTPLUG
- bool "Support for hot-pluggable devices"
- ---help---
- Say Y here if you want to plug devices into your computer while
- the system is running, and be able to use them quickly. In many
- cases, the devices can likewise be unplugged at any time too.
-
- One well known example of this is PCMCIA- or PC-cards, credit-card
- size devices such as network cards, modems or hard drives which are
- plugged into slots found on all modern laptop computers. Another
- example, used on modern desktops as well as laptops, is USB.
-
- Enable HOTPLUG and KMOD, and build a modular kernel. Get agent
- software (at ) and install it.
- Then your kernel will automatically call out to a user mode "policy
- agent" (/sbin/hotplug) to load modules and set up software needed
- to use devices as you hotplug them.
-
source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
@@ -1177,10 +1265,32 @@ config DEBUG_KERNEL
Say Y here if you are developing drivers or trying to debug and
identify kernel problems.
+config EARLY_PRINTK
+ bool "Early printk" if EMBEDDED
+ default y
+ help
+ Write kernel log output directly into the VGA buffer or to a serial
+ port.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized. For normal operation
+ it is not recommended because it looks ugly and doesn't cooperate
+ with klogd/syslogd or the X server. You should normally N here,
+ unless you want to debug such a crash.
+
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
+config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+ help
+ Enables the display of the minimum amount of free stack which each
+ task has ever had available in the sysrq-T and sysrq-P debug output.
+
+ This option will slow down process creation somewhat.
+
config DEBUG_SLAB
bool "Debug memory allocations"
depends on DEBUG_KERNEL
@@ -1231,6 +1341,15 @@ config DEBUG_PAGEALLOC
This results in a large slowdown, but helps to find certain types
of memory corruptions.
+config SPINLINE
+ bool "Spinlock inlining"
+ depends on DEBUG_KERNEL
+ help
+ This will change spinlocks from out of line to inline, making them
+ account cost to the callers in readprofile, rather than the lock
+ itself (as ".text.lock.filename"). This can be helpful for finding
+ the callers of locks.
+
config DEBUG_HIGHMEM
bool "Highmem debugging"
depends on DEBUG_KERNEL && HIGHMEM
@@ -1247,20 +1366,208 @@ config DEBUG_INFO
Say Y here only if you plan to use gdb to debug the kernel.
If you don't debug the kernel, you can say N.
+config LOCKMETER
+ bool "Kernel lock metering"
+ depends on SMP
+ help
+ Say Y to enable kernel lock metering, which adds overhead to SMP locks,
+ but allows you to see various statistics using the lockstat command.
+
config DEBUG_SPINLOCK_SLEEP
bool "Sleep-inside-spinlock checking"
help
If you say Y here, various routines which may sleep will become very
noisy if they are called with a spinlock held.
+config KGDB
+ bool "Include kgdb kernel debugger"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here, the system will be compiled with the debug
+ option (-g) and a debugging stub will be included in the
+ kernel. This stub communicates with gdb on another (host)
+ computer via a serial port. The host computer should have
+ access to the kernel binary file (vmlinux) and a serial port
+ that is connected to the target machine. Gdb can be made to
+ configure the serial port or you can use stty and setserial to
+ do this. See the 'target' command in gdb. This option also
+ configures in the ability to request a breakpoint early in the
+ boot process. To request the breakpoint just include 'kgdb'
+ as a boot option when booting the target machine. The system
+ will then break as soon as it looks at the boot options. This
+ option also installs a breakpoint in panic and sends any
+ kernel faults to the debugger. For more information see the
+ Documentation/i386/kgdb/kgdb.txt file.
+
+choice
+ depends on KGDB
+ prompt "Debug serial port BAUD"
+ default KGDB_115200BAUD
+ help
+ Gdb and the kernel stub need to agree on the baud rate to be
+ used. Some systems (x86 family at this writing) allow this to
+ be configured.
+
+config KGDB_9600BAUD
+ bool "9600"
+
+config KGDB_19200BAUD
+ bool "19200"
+
+config KGDB_38400BAUD
+ bool "38400"
+
+config KGDB_57600BAUD
+ bool "57600"
+
+config KGDB_115200BAUD
+ bool "115200"
+endchoice
+
+config KGDB_PORT
+ hex "hex I/O port address of the debug serial port"
+ depends on KGDB
+ default 3f8
+ help
+ Some systems (x86 family at this writing) allow the port
+ address to be configured. The number entered is assumed to be
+ hex, don't put 0x in front of it. The standard address are:
+ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx
+ will tell you what you have. It is good to test the serial
+ connection with a live system before trying to debug.
+
+config KGDB_IRQ
+ int "IRQ of the debug serial port"
+ depends on KGDB
+ default 4
+ help
+ This is the irq for the debug port. If everything is working
+ correctly and the kernel has interrupts on a control C to the
+ port should cause a break into the kernel debug stub.
+
+config DEBUG_INFO
+ bool
+ depends on KGDB
+ default y
+
+config KGDB_MORE
+ bool "Add any additional compile options"
+ depends on KGDB
+ default n
+ help
+ Saying yes here turns on the ability to enter additional
+ compile options.
+
+
+config KGDB_OPTIONS
+ depends on KGDB_MORE
+ string "Additional compile arguments"
+ default "-O1"
+ help
+ This option allows you enter additional compile options for
+ the whole kernel compile. Each platform will have a default
+ that seems right for it. For example on PPC "-ggdb -O1", and
+ for i386 "-O1". Note that by configuring KGDB "-g" is already
+ turned on. In addition, on i386 platforms
+ "-fomit-frame-pointer" is deleted from the standard compile
+ options.
+
+config NO_KGDB_CPUS
+ int "Number of CPUs"
+ depends on KGDB && SMP
+ default NR_CPUS
+ help
+
+ This option sets the number of cpus for kgdb ONLY. It is used
+ to prune some internal structures so they look "nice" when
+ displayed with gdb. This is to overcome possibly larger
+ numbers that may have been entered above. Enter the real
+ number to get nice clean kgdb_info displays.
+
+config KGDB_TS
+ bool "Enable kgdb time stamp macros?"
+ depends on KGDB
+ default n
+ help
+ Kgdb event macros allow you to instrument your code with calls
+ to the kgdb event recording function. The event log may be
+ examined with gdb at a break point. Turning on this
+ capability also allows you to choose how many events to
+ keep. Kgdb always keeps the lastest events.
+
+choice
+ depends on KGDB_TS
+ prompt "Max number of time stamps to save?"
+ default KGDB_TS_128
+
+config KGDB_TS_64
+ bool "64"
+
+config KGDB_TS_128
+ bool "128"
+
+config KGDB_TS_256
+ bool "256"
+
+config KGDB_TS_512
+ bool "512"
+
+config KGDB_TS_1024
+ bool "1024"
+
+endchoice
+
+config STACK_OVERFLOW_TEST
+ bool "Turn on kernel stack overflow testing?"
+ depends on KGDB
+ default n
+ help
+ This option enables code in the front line interrupt handlers
+ to check for kernel stack overflow on interrupts and system
+ calls. This is part of the kgdb code on x86 systems.
+
+config KGDB_CONSOLE
+ bool "Enable serial console thru kgdb port"
+ depends on KGDB
+ default n
+ help
+ This option enables the command line "console=kgdb" option.
+ When the system is booted with this option in the command line
+ all kernel printk output is sent to gdb (as well as to other
+ consoles). For this to work gdb must be connected. For this
+ reason, this command line option will generate a breakpoint if
+ gdb has not yet connected. After the gdb continue command is
+ given all pent up console output will be printed by gdb on the
+ host machine. Neither this option, nor KGDB require the
+ serial driver to be configured.
+
+config KGDB_SYSRQ
+ bool "Turn on SysRq 'G' command to do a break?"
+ depends on KGDB
+ default y
+ help
+ This option includes an option in the SysRq code that allows
+ you to enter SysRq G which generates a breakpoint to the KGDB
+ stub. This will work if the keyboard is alive and can
+ interrupt the system. Because of constraints on when the
+ serial port interrupt can be enabled, this code may allow you
+ to interrupt the system before the serial port control C is
+ available. Just say yes here.
+
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
+ default KGDB
help
If you say Y here the resulting kernel image will be slightly larger
and slower, but it will give very useful debugging information.
If you don't debug the kernel, you can say N, but we may not be able
to solve problems without frame pointers.
+config MAGIC_SYSRQ
+ bool
+ depends on KGDB_SYSRQ
+ default y
+
config X86_FIND_SMP_CONFIG
bool
depends on X86_LOCAL_APIC || X86_VOYAGER
--- linux-2.6.3/arch/i386/kernel/acpi/boot.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/acpi/boot.c 2004-02-20 00:21:54.000000000 -0800
@@ -96,6 +96,31 @@ char *__acpi_map_table(unsigned long phy
}
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ if (mcfg->base_reserved) {
+ printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+ return -ENODEV;
+ }
+
+ pci_mmcfg_base_addr = mcfg->base_address;
+
+ return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -339,7 +364,7 @@ acpi_scan_rsdp (
* RSDP signature.
*/
for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
+ if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len))
continue;
return (start + offset);
}
@@ -376,6 +401,37 @@ static int __init acpi_parse_hpet(unsign
}
#endif
+/* detect the location of the ACPI PM Timer */
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+ struct fadt_descriptor_rev2 *fadt =0;
+
+ fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
+ if(!fadt) {
+ printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+ return 0;
+ }
+
+ if (fadt->revision >= FADT2_REVISION_ID) {
+ /* FADT rev. 2 */
+ if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
+ return 0;
+
+ pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ } else {
+ /* FADT rev. 1 */
+ pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ }
+ if (pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
+ return 0;
+}
+#endif
+
+
unsigned long __init
acpi_find_rsdp (void)
{
@@ -398,55 +454,14 @@ acpi_find_rsdp (void)
return rsdp_phys;
}
-/*
- * acpi_boot_init()
- * called from setup_arch(), always.
- * 1. maps ACPI tables for later use
- * 2. enumerates lapics
- * 3. enumerates io-apics
- *
- * side effects:
- * acpi_lapic = 1 if LAPIC found
- * acpi_ioapic = 1 if IOAPIC found
- * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
- * if acpi_blacklisted() acpi_disabled = 1;
- * acpi_irq_model=...
- * ...
- *
- * return value: (currently ignored)
- * 0: success
- * !0: failure
- */
-int __init
-acpi_boot_init (void)
+static int acpi_apic_setup(void)
{
- int result = 0;
-
- if (acpi_disabled && !acpi_ht)
- return 1;
+ int result;
- /*
- * The default interrupt routing model is PIC (8259). This gets
- * overriden if IOAPICs are enumerated (below).
- */
- acpi_irq_model = ACPI_IRQ_MODEL_PIC;
-
- /*
- * Initialize the ACPI boot-time table parser.
- */
- result = acpi_table_init();
- if (result) {
- acpi_disabled = 1;
- return result;
- }
-
- result = acpi_blacklisted();
- if (result) {
- printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
- acpi_disabled = 1;
- return result;
- }
+#ifdef CONFIG_X86_PM_TIMER
+ acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
@@ -506,24 +521,17 @@ acpi_boot_init (void)
acpi_lapic = 1;
-#endif /*CONFIG_X86_LOCAL_APIC*/
+#endif /* CONFIG_X86_LOCAL_APIC */
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
/*
* I/O APIC
- * --------
*/
- /*
- * ACPI interpreter is required to complete interrupt setup,
- * so if it is off, don't enumerate the io-apics with ACPI.
- * If MPS is present, it will handle them,
- * otherwise the system will stay in PIC mode
- */
- if (acpi_disabled || acpi_noirq) {
+ if (acpi_noirq) {
return 1;
- }
+ }
/*
* if "noapic" boot option, don't look for IO-APICs
@@ -538,8 +546,7 @@ acpi_boot_init (void)
if (!result) {
printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
return -ENODEV;
- }
- else if (result < 0) {
+ } else if (result < 0) {
printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
return result;
}
@@ -576,9 +583,82 @@ acpi_boot_init (void)
}
#endif
+ return 0;
+}
+
+/*
+ * acpi_boot_init()
+ * called from setup_arch(), always.
+ * 1. maps ACPI tables for later use
+ * 2. enumerates lapics
+ * 3. enumerates io-apics
+ *
+ * side effects:
+ * acpi_lapic = 1 if LAPIC found
+ * acpi_ioapic = 1 if IOAPIC found
+ * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ * if acpi_blacklisted() acpi_disabled = 1;
+ * acpi_irq_model=...
+ * ...
+ *
+ * return value: (currently ignored)
+ * 0: success
+ * !0: failure
+ */
+
+int __init
+acpi_boot_init (void)
+{
+ int result, error;
+
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ /*
+ * The default interrupt routing model is PIC (8259). This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+ acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+ /*
+ * Initialize the ACPI boot-time table parser.
+ */
+ result = acpi_table_init();
+ if (result) {
+ acpi_disabled = 1;
+ return result;
+ }
+
+ result = acpi_blacklisted();
+ if (result) {
+ printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
+ acpi_disabled = 1;
+ return result;
+ }
+
+ error = acpi_apic_setup();
+
+#ifdef CONFIG_PCI_MMCONFIG
+ result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if (result < 0) {
+ printk(KERN_ERR PREFIX "Error %d parsing MCFG\n", result);
+ if (!error)
+ error = result;
+ } else if (result > 1) {
+ printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+ }
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_HPET_TIMER
- acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+ result = acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+ if (result < 0) {
+ printk(KERN_ERR PREFIX "Error %d parsing HPET\n", result);
+ if (!error)
+ error = result;
+ } else if (result > 1) {
+ printk(KERN_WARNING PREFIX "Multiple HPET tables exist\n");
+ }
#endif
- return 0;
+ return error;
}
--- linux-2.6.3/arch/i386/kernel/apic.c 2004-02-03 20:42:34.000000000 -0800
+++ 25/arch/i386/kernel/apic.c 2004-02-20 00:20:36.000000000 -0800
@@ -595,7 +595,7 @@ static int __init init_lapic_sysfs(void)
error = sysdev_class_register(&lapic_sysclass);
if (!error)
- error = sys_device_register(&device_lapic);
+ error = sysdev_register(&device_lapic);
return error;
}
device_initcall(init_lapic_sysfs);
--- linux-2.6.3/arch/i386/kernel/asm-offsets.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/asm-offsets.c 2004-02-20 00:21:53.000000000 -0800
@@ -4,9 +4,11 @@
* to extract and format the required data.
*/
+#include
#include
#include
#include "sigframe.h"
+#include
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -28,4 +30,17 @@ void foo(void)
DEFINE(RT_SIGFRAME_sigcontext,
offsetof (struct rt_sigframe, uc.uc_mcontext));
+ DEFINE(TI_task, offsetof (struct thread_info, task));
+ DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain));
+ DEFINE(TI_flags, offsetof (struct thread_info, flags));
+ DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count));
+ DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit));
+ DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack));
+ DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack));
+ DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd));
+
+ DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0));
+ DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL));
+ DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+ DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7]));
}
--- linux-2.6.3/arch/i386/kernel/cpu/centaur.c 2003-06-14 12:18:24.000000000 -0700
+++ 25/arch/i386/kernel/cpu/centaur.c 2004-02-20 00:21:16.000000000 -0800
@@ -246,7 +246,15 @@ static void __init winchip2_protect_mcr(
lo&=~0x1C0; /* blank bits 8-6 */
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
}
-#endif
+#endif /* CONFIG_X86_OOSTORE */
+
+#define ACE_PRESENT (1 << 6)
+#define ACE_ENABLED (1 << 7)
+#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
+
+#define RNG_PRESENT (1 << 2)
+#define RNG_ENABLED (1 << 3)
+#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
static void __init init_c3(struct cpuinfo_x86 *c)
{
@@ -254,6 +262,24 @@ static void __init init_c3(struct cpuinf
/* Test for Centaur Extended Feature Flags presence */
if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+ u32 tmp = cpuid_edx(0xC0000001);
+
+ /* enable ACE unit, if present and disabled */
+ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+ rdmsr (MSR_VIA_FCR, lo, hi);
+ lo |= ACE_FCR; /* enable ACE unit */
+ wrmsr (MSR_VIA_FCR, lo, hi);
+ printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+ }
+
+ /* enable RNG unit, if present and disabled */
+ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+ rdmsr (MSR_VIA_RNG, lo, hi);
+ lo |= RNG_ENABLE; /* enable RNG unit */
+ wrmsr (MSR_VIA_RNG, lo, hi);
+ printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+ }
+
/* store Centaur Extended Feature Flags as
* word 5 of the CPU capability bit array
*/
--- linux-2.6.3/arch/i386/kernel/cpu/common.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/cpu/common.c 2004-02-20 00:21:53.000000000 -0800
@@ -514,12 +514,16 @@ void __init cpu_init (void)
set_tss_desc(cpu,t);
cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
- load_LDT(&init_mm.context);
+ if (cpu)
+ load_LDT(&init_mm.context);
/* Set up doublefault TSS pointer in the GDT */
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff;
+ if (cpu)
+ trap_init_virtual_GDT();
+
/* Clear %fs and %gs. */
asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
--- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-02-03 20:42:34.000000000 -0800
+++ 25/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-02-20 00:19:59.000000000 -0800
@@ -54,7 +54,7 @@ config X86_ACPI_CPUFREQ_PROC_INTF
config ELAN_CPUFREQ
tristate "AMD Elan"
- depends on CPU_FREQ_TABLE && MELAN
+ depends on CPU_FREQ_TABLE && X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC400 and SC410
processors.
--- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/longhaul.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/cpu/cpufreq/longhaul.c 2004-02-20 00:19:59.000000000 -0800
@@ -1,5 +1,5 @@
/*
- * (C) 2001-2003 Dave Jones.
+ * (C) 2001-2004 Dave Jones.
* (C) 2002 Padraig Brady.
*
* Licensed under the terms of the GNU GPL License version 2.
@@ -186,6 +186,7 @@ static int _guess (int guess, int maxmul
return target;
}
+
static int guess_fsb(int maxmult)
{
int speed = (cpu_khz/1000);
@@ -203,7 +204,6 @@ static int guess_fsb(int maxmult)
}
-
static int __init longhaul_get_ranges (void)
{
struct cpuinfo_x86 *c = cpu_data;
@@ -359,7 +359,7 @@ static int longhaul_target (struct cpufr
return 0;
}
-static int longhaul_cpu_init (struct cpufreq_policy *policy)
+static int __init longhaul_cpu_init (struct cpufreq_policy *policy)
{
struct cpuinfo_x86 *c = cpu_data;
char *cpuname=NULL;
--- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/longrun.c 2003-09-08 13:58:55.000000000 -0700
+++ 25/arch/i386/kernel/cpu/cpufreq/longrun.c 2004-02-20 00:19:59.000000000 -0800
@@ -220,7 +220,7 @@ static unsigned int __init longrun_deter
}
-static int longrun_cpu_init(struct cpufreq_policy *policy)
+static int __init longrun_cpu_init(struct cpufreq_policy *policy)
{
int result = 0;
--- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-20 00:20:52.000000000 -0800
@@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int
u32 l, h;
cpumask_t cpus_allowed, affected_cpu_map;
struct cpufreq_freqs freqs;
- int hyperthreading = 0;
- int sibling = 0;
+ int j;
if (!cpu_online(cpu) || (newstate > DC_DISABLE) ||
(newstate == DC_RESV))
@@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int
cpus_allowed = current->cpus_allowed;
/* only run on CPU to be set, or on its sibling */
- affected_cpu_map = cpumask_of_cpu(cpu);
-#ifdef CONFIG_X86_HT
- hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2));
- if (hyperthreading) {
- sibling = cpu_sibling_map[cpu];
- cpu_set(sibling, affected_cpu_map);
- }
+#ifdef CONFIG_SMP
+ affected_cpu_map = cpu_sibling_map[cpu];
+#else
+ affected_cpu_map = cpumask_of_cpu(cpu);
#endif
set_cpus_allowed(current, affected_cpu_map);
BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map));
@@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int
/* notifiers */
freqs.old = stock_freq * l / 8;
freqs.new = stock_freq * newstate / 8;
- freqs.cpu = cpu;
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- if (hyperthreading) {
- freqs.cpu = sibling;
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ for_each_cpu(j) {
+ if (cpu_isset(j, affected_cpu_map)) {
+ freqs.cpu = j;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
}
rdmsr(MSR_IA32_THERM_STATUS, l, h);
@@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int
set_cpus_allowed(current, cpus_allowed);
/* notifiers */
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
- if (hyperthreading) {
- freqs.cpu = cpu;
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ for_each_cpu(j) {
+ if (cpu_isset(j, affected_cpu_map)) {
+ freqs.cpu = j;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
}
return 0;
--- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/powernow-k7.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k7.c 2004-02-20 00:19:59.000000000 -0800
@@ -1,7 +1,7 @@
/*
* AMD K7 Powernow driver.
* (C) 2003 Dave Jones on behalf of SuSE Labs.
- * (C) 2003 Dave Jones
+ * (C) 2003-2004 Dave Jones
*
* Licensed under the terms of the GNU GPL License version 2.
* Based upon datasheets & sample CPUs kindly provided by AMD.
--- linux-2.6.3/arch/i386/kernel/cpu/intel.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/cpu/intel.c 2004-02-20 00:21:53.000000000 -0800
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include "cpu.h"
@@ -19,8 +20,6 @@
#include
#endif
-extern int trap_init_f00f_bug(void);
-
#ifdef CONFIG_X86_INTEL_USERCOPY
/*
* Alignment at which movsl is preferred for bulk memory copies.
@@ -165,7 +164,7 @@ static void __init init_intel(struct cpu
c->f00f_bug = 1;
if ( !f00f_workaround_enabled ) {
- trap_init_f00f_bug();
+ trap_init_virtual_IDT();
printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
f00f_workaround_enabled = 1;
}
@@ -248,6 +247,12 @@ static void __init init_intel(struct cpu
/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
clear_bit(X86_FEATURE_SEP, c->x86_capability);
+ /*
+ * FIXME: SEP is disabled for 4G/4G for now:
+ */
+#ifdef CONFIG_X86_HIGH_ENTRY
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+#endif
/* Names for the Pentium II/Celeron processors
detectable only by also checking the cache size.
--- linux-2.6.3/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-02-03 20:42:34.000000000 -0800
+++ 25/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-02-20 00:19:59.000000000 -0800
@@ -24,8 +24,6 @@
#include "mce.h"
-static struct timer_list mce_timer;
-static int timerset;
static int firstbank;
#define MCE_RATE 15*HZ /* timer rate is 15s */
@@ -35,14 +33,15 @@ static void mce_checkregs (void *info)
u32 low, high;
int i;
- preempt_disable();
for (i=firstbank; i 1)
- schedule_work (&mce_work);
-#endif
- mce_timer.expires = jiffies + MCE_RATE;
- add_timer (&mce_timer);
-}
-
static int __init init_nonfatal_mce_checker(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -91,17 +80,11 @@ static int __init init_nonfatal_mce_chec
else
firstbank = 0;
- if (timerset == 0) {
- /* Set the timer to check for non-fatal
- errors every MCE_RATE seconds */
- init_timer (&mce_timer);
- mce_timer.expires = jiffies + MCE_RATE;
- mce_timer.data = 0;
- mce_timer.function = &mce_timerfunc;
- add_timer (&mce_timer);
- timerset = 1;
- printk(KERN_INFO "Machine check exception polling timer started.\n");
- }
+ /*
+ * Check for non-fatal errors every MCE_RATE s
+ */
+ schedule_delayed_work(&mce_work, MCE_RATE);
+ printk(KERN_INFO "Machine check exception polling timer started.\n");
return 0;
}
module_init(init_nonfatal_mce_checker);
--- linux-2.6.3/arch/i386/kernel/cpu/mtrr/generic.c 2003-08-22 19:23:40.000000000 -0700
+++ 25/arch/i386/kernel/cpu/mtrr/generic.c 2004-02-20 00:21:46.000000000 -0800
@@ -45,7 +45,7 @@ get_fixed_ranges(mtrr_type * frs)
}
/* Grab all of the MTRR state for this CPU into *state */
-void get_mtrr_state(void)
+void __init get_mtrr_state(void)
{
unsigned int i;
struct mtrr_var_range *vrs;
@@ -142,7 +142,7 @@ void generic_get_mtrr(unsigned int reg,
*type = base_lo & 0xff;
}
-static int __init set_fixed_ranges(mtrr_type * frs)
+static int set_fixed_ranges(mtrr_type * frs)
{
unsigned int *p = (unsigned int *) frs;
int changed = FALSE;
@@ -177,7 +177,7 @@ static int __init set_fixed_ranges(mtrr_
/* Set the MSR pair relating to a var range. Returns TRUE if
changes are made */
-static int __init set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
{
unsigned int lo, hi;
int changed = FALSE;
--- linux-2.6.3/arch/i386/kernel/cpu/mtrr/main.c 2003-09-08 13:58:55.000000000 -0700
+++ 25/arch/i386/kernel/cpu/mtrr/main.c 2004-02-20 00:21:46.000000000 -0800
@@ -111,7 +111,7 @@ void __init set_num_var_ranges(void)
num_var_ranges = config & 0xff;
}
-static void init_table(void)
+static void __init init_table(void)
{
int i, max;
@@ -541,7 +541,7 @@ static void __init init_ifs(void)
centaur_init_mtrr();
}
-static void init_other_cpus(void)
+static void __init init_other_cpus(void)
{
if (use_intel())
get_mtrr_state();
@@ -608,7 +608,7 @@ static struct sysdev_driver mtrr_sysdev_
/**
- * mtrr_init - initialie mtrrs on the boot CPU
+ * mtrr_init - initialize mtrrs on the boot CPU
*
* This needs to be called early; before any of the other CPUs are
* initialized (i.e. before smp_init()).
@@ -618,7 +618,7 @@ static int __init mtrr_init(void)
{
init_ifs();
- if ( cpu_has_mtrr ) {
+ if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
size_or_mask = 0xff000000; /* 36 bits */
size_and_mask = 0x00f00000;
@@ -660,7 +660,7 @@ static int __init mtrr_init(void)
} else {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
- if ( cpu_has_k6_mtrr ) {
+ if (cpu_has_k6_mtrr) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
size_or_mask = 0xfff00000; /* 32 bits */
@@ -668,14 +668,14 @@ static int __init mtrr_init(void)
}
break;
case X86_VENDOR_CENTAUR:
- if ( cpu_has_centaur_mcr ) {
+ if (cpu_has_centaur_mcr) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
size_or_mask = 0xfff00000; /* 32 bits */
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
- if ( cpu_has_cyrix_arr ) {
+ if (cpu_has_cyrix_arr) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
size_or_mask = 0xfff00000; /* 32 bits */
size_and_mask = 0;
--- linux-2.6.3/arch/i386/kernel/cpu/proc.c 2003-08-22 19:23:40.000000000 -0700
+++ 25/arch/i386/kernel/cpu/proc.c 2004-02-20 00:21:16.000000000 -0800
@@ -50,7 +50,7 @@ static int show_cpuinfo(struct seq_file
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
--- linux-2.6.3/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/doublefault.c 2004-02-20 00:21:53.000000000 -0800
@@ -7,12 +7,13 @@
#include
#include
#include
+#include
#define DOUBLEFAULT_STACKSIZE (1024)
static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
-#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000)
+#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START))
static void doublefault_fn(void)
{
@@ -38,8 +39,8 @@ static void doublefault_fn(void)
printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
t->eax, t->ebx, t->ecx, t->edx);
- printk("esi = %08lx, edi = %08lx\n",
- t->esi, t->edi);
+ printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
+ t->esi, t->edi, t->ebp);
}
}
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/i386/kernel/early_printk.c 2004-02-20 00:19:59.000000000 -0800
@@ -0,0 +1,2 @@
+
+#include "../../x86_64/kernel/early_printk.c"
--- linux-2.6.3/arch/i386/kernel/edd.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/edd.c 2004-02-20 00:19:59.000000000 -0800
@@ -134,18 +134,18 @@ edd_show_host_bus(struct edd_device *ede
for (i = 0; i < 4; i++) {
if (isprint(info->params.host_bus_type[i])) {
- p += snprintf(p, left, "%c", info->params.host_bus_type[i]);
+ p += scnprintf(p, left, "%c", info->params.host_bus_type[i]);
} else {
- p += snprintf(p, left, " ");
+ p += scnprintf(p, left, " ");
}
}
if (!strncmp(info->params.host_bus_type, "ISA", 3)) {
- p += snprintf(p, left, "\tbase_address: %x\n",
+ p += scnprintf(p, left, "\tbase_address: %x\n",
info->params.interface_path.isa.base_address);
} else if (!strncmp(info->params.host_bus_type, "PCIX", 4) ||
!strncmp(info->params.host_bus_type, "PCI", 3)) {
- p += snprintf(p, left,
+ p += scnprintf(p, left,
"\t%02x:%02x.%d channel: %u\n",
info->params.interface_path.pci.bus,
info->params.interface_path.pci.slot,
@@ -154,12 +154,12 @@ edd_show_host_bus(struct edd_device *ede
} else if (!strncmp(info->params.host_bus_type, "IBND", 4) ||
!strncmp(info->params.host_bus_type, "XPRS", 4) ||
!strncmp(info->params.host_bus_type, "HTPT", 4)) {
- p += snprintf(p, left,
+ p += scnprintf(p, left,
"\tTBD: %llx\n",
info->params.interface_path.ibnd.reserved);
} else {
- p += snprintf(p, left, "\tunknown: %llx\n",
+ p += scnprintf(p, left, "\tunknown: %llx\n",
info->params.interface_path.unknown.reserved);
}
return (p - buf);
@@ -178,43 +178,43 @@ edd_show_interface(struct edd_device *ed
for (i = 0; i < 8; i++) {
if (isprint(info->params.interface_type[i])) {
- p += snprintf(p, left, "%c", info->params.interface_type[i]);
+ p += scnprintf(p, left, "%c", info->params.interface_type[i]);
} else {
- p += snprintf(p, left, " ");
+ p += scnprintf(p, left, " ");
}
}
if (!strncmp(info->params.interface_type, "ATAPI", 5)) {
- p += snprintf(p, left, "\tdevice: %u lun: %u\n",
+ p += scnprintf(p, left, "\tdevice: %u lun: %u\n",
info->params.device_path.atapi.device,
info->params.device_path.atapi.lun);
} else if (!strncmp(info->params.interface_type, "ATA", 3)) {
- p += snprintf(p, left, "\tdevice: %u\n",
+ p += scnprintf(p, left, "\tdevice: %u\n",
info->params.device_path.ata.device);
} else if (!strncmp(info->params.interface_type, "SCSI", 4)) {
- p += snprintf(p, left, "\tid: %u lun: %llu\n",
+ p += scnprintf(p, left, "\tid: %u lun: %llu\n",
info->params.device_path.scsi.id,
info->params.device_path.scsi.lun);
} else if (!strncmp(info->params.interface_type, "USB", 3)) {
- p += snprintf(p, left, "\tserial_number: %llx\n",
+ p += scnprintf(p, left, "\tserial_number: %llx\n",
info->params.device_path.usb.serial_number);
} else if (!strncmp(info->params.interface_type, "1394", 4)) {
- p += snprintf(p, left, "\teui: %llx\n",
+ p += scnprintf(p, left, "\teui: %llx\n",
info->params.device_path.i1394.eui);
} else if (!strncmp(info->params.interface_type, "FIBRE", 5)) {
- p += snprintf(p, left, "\twwid: %llx lun: %llx\n",
+ p += scnprintf(p, left, "\twwid: %llx lun: %llx\n",
info->params.device_path.fibre.wwid,
info->params.device_path.fibre.lun);
} else if (!strncmp(info->params.interface_type, "I2O", 3)) {
- p += snprintf(p, left, "\tidentity_tag: %llx\n",
+ p += scnprintf(p, left, "\tidentity_tag: %llx\n",
info->params.device_path.i2o.identity_tag);
} else if (!strncmp(info->params.interface_type, "RAID", 4)) {
- p += snprintf(p, left, "\tidentity_tag: %x\n",
+ p += scnprintf(p, left, "\tidentity_tag: %x\n",
info->params.device_path.raid.array_number);
} else if (!strncmp(info->params.interface_type, "SATA", 4)) {
- p += snprintf(p, left, "\tdevice: %u\n",
+ p += scnprintf(p, left, "\tdevice: %u\n",
info->params.device_path.sata.device);
} else {
- p += snprintf(p, left, "\tunknown: %llx %llx\n",
+ p += scnprintf(p, left, "\tunknown: %llx %llx\n",
info->params.device_path.unknown.reserved1,
info->params.device_path.unknown.reserved2);
}
@@ -256,7 +256,7 @@ edd_show_version(struct edd_device *edev
return -EINVAL;
}
- p += snprintf(p, left, "0x%02x\n", info->version);
+ p += scnprintf(p, left, "0x%02x\n", info->version);
return (p - buf);
}
@@ -264,7 +264,7 @@ static ssize_t
edd_show_disk80_sig(struct edd_device *edev, char *buf)
{
char *p = buf;
- p += snprintf(p, left, "0x%08x\n", edd_disk80_sig);
+ p += scnprintf(p, left, "0x%08x\n", edd_disk80_sig);
return (p - buf);
}
@@ -278,16 +278,16 @@ edd_show_extensions(struct edd_device *e
}
if (info->interface_support & EDD_EXT_FIXED_DISK_ACCESS) {
- p += snprintf(p, left, "Fixed disk access\n");
+ p += scnprintf(p, left, "Fixed disk access\n");
}
if (info->interface_support & EDD_EXT_DEVICE_LOCKING_AND_EJECTING) {
- p += snprintf(p, left, "Device locking and ejecting\n");
+ p += scnprintf(p, left, "Device locking and ejecting\n");
}
if (info->interface_support & EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT) {
- p += snprintf(p, left, "Enhanced Disk Drive support\n");
+ p += scnprintf(p, left, "Enhanced Disk Drive support\n");
}
if (info->interface_support & EDD_EXT_64BIT_EXTENSIONS) {
- p += snprintf(p, left, "64-bit extensions\n");
+ p += scnprintf(p, left, "64-bit extensions\n");
}
return (p - buf);
}
@@ -302,21 +302,21 @@ edd_show_info_flags(struct edd_device *e
}
if (info->params.info_flags & EDD_INFO_DMA_BOUNDARY_ERROR_TRANSPARENT)
- p += snprintf(p, left, "DMA boundary error transparent\n");
+ p += scnprintf(p, left, "DMA boundary error transparent\n");
if (info->params.info_flags & EDD_INFO_GEOMETRY_VALID)
- p += snprintf(p, left, "geometry valid\n");
+ p += scnprintf(p, left, "geometry valid\n");
if (info->params.info_flags & EDD_INFO_REMOVABLE)
- p += snprintf(p, left, "removable\n");
+ p += scnprintf(p, left, "removable\n");
if (info->params.info_flags & EDD_INFO_WRITE_VERIFY)
- p += snprintf(p, left, "write verify\n");
+ p += scnprintf(p, left, "write verify\n");
if (info->params.info_flags & EDD_INFO_MEDIA_CHANGE_NOTIFICATION)
- p += snprintf(p, left, "media change notification\n");
+ p += scnprintf(p, left, "media change notification\n");
if (info->params.info_flags & EDD_INFO_LOCKABLE)
- p += snprintf(p, left, "lockable\n");
+ p += scnprintf(p, left, "lockable\n");
if (info->params.info_flags & EDD_INFO_NO_MEDIA_PRESENT)
- p += snprintf(p, left, "no media present\n");
+ p += scnprintf(p, left, "no media present\n");
if (info->params.info_flags & EDD_INFO_USE_INT13_FN50)
- p += snprintf(p, left, "use int13 fn50\n");
+ p += scnprintf(p, left, "use int13 fn50\n");
return (p - buf);
}
@@ -329,7 +329,7 @@ edd_show_default_cylinders(struct edd_de
return -EINVAL;
}
- p += snprintf(p, left, "0x%x\n", info->params.num_default_cylinders);
+ p += scnprintf(p, left, "0x%x\n", info->params.num_default_cylinders);
return (p - buf);
}
@@ -342,7 +342,7 @@ edd_show_default_heads(struct edd_device
return -EINVAL;
}
- p += snprintf(p, left, "0x%x\n", info->params.num_default_heads);
+ p += scnprintf(p, left, "0x%x\n", info->params.num_default_heads);
return (p - buf);
}
@@ -355,7 +355,7 @@ edd_show_default_sectors_per_track(struc
return -EINVAL;
}
- p += snprintf(p, left, "0x%x\n", info->params.sectors_per_track);
+ p += scnprintf(p, left, "0x%x\n", info->params.sectors_per_track);
return (p - buf);
}
@@ -368,7 +368,7 @@ edd_show_sectors(struct edd_device *edev
return -EINVAL;
}
- p += snprintf(p, left, "0x%llx\n", info->params.number_of_sectors);
+ p += scnprintf(p, left, "0x%llx\n", info->params.number_of_sectors);
return (p - buf);
}
--- linux-2.6.3/arch/i386/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800
+++ 25/arch/i386/kernel/entry.S 2004-02-20 00:21:53.000000000 -0800
@@ -43,11 +43,25 @@
#include
#include
#include
+#include
#include
#include
+#include
#include
#include
#include "irq_vectors.h"
+ /* We do not recover from a stack overflow, but at least
+ * we know it happened and should be able to track it down.
+ */
+#ifdef CONFIG_STACK_OVERFLOW_TEST
+#define STACK_OVERFLOW_TEST \
+ testl $7680,%esp; \
+ jnz 10f; \
+ call stack_overflow; \
+10:
+#else
+#define STACK_OVERFLOW_TEST
+#endif
#define nr_syscalls ((syscall_table_size)/4)
@@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200)
#define resume_kernel restore_all
#endif
-#define SAVE_ALL \
+#ifdef CONFIG_X86_HIGH_ENTRY
+
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+/*
+ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu,
+ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
+ * left stale, so we must check whether to repeat the real stack calculation.
+ */
+#define repeat_if_esp_changed \
+ xorl %esp, %ebp; \
+ testl $0xffffe000, %ebp; \
+ jnz 0b
+#else
+#define repeat_if_esp_changed
+#endif
+
+/* clobbers ebx, edx and ebp */
+
+#define __SWITCH_KERNELSPACE \
+ cmpl $0xff000000, %esp; \
+ jb 1f; \
+ \
+ /* \
+ * switch pagetables and load the real stack, \
+ * keep the stack offset: \
+ */ \
+ \
+ movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \
+ \
+ /* GET_THREAD_INFO(%ebp) intermixed */ \
+0: \
+ movl %esp, %ebp; \
+ movl %esp, %ebx; \
+ andl $0xffffe000, %ebp; \
+ andl $0x00001fff, %ebx; \
+ orl TI_real_stack(%ebp), %ebx; \
+ repeat_if_esp_changed; \
+ \
+ movl %edx, %cr3; \
+ movl %ebx, %esp; \
+1:
+
+#endif
+
+
+#define __SWITCH_USERSPACE \
+ /* interrupted any of the user return paths? */ \
+ \
+ movl EIP(%esp), %eax; \
+ \
+ cmpl $int80_ret_start_marker, %eax; \
+ jb 33f; /* nope - continue with sysexit check */\
+ cmpl $int80_ret_end_marker, %eax; \
+ jb 22f; /* yes - switch to virtual stack */ \
+33: \
+ cmpl $sysexit_ret_start_marker, %eax; \
+ jb 44f; /* nope - continue with user check */ \
+ cmpl $sysexit_ret_end_marker, %eax; \
+ jb 22f; /* yes - switch to virtual stack */ \
+ /* return to userspace? */ \
+44: \
+ movl EFLAGS(%esp),%ecx; \
+ movb CS(%esp),%cl; \
+ testl $(VM_MASK | 3),%ecx; \
+ jz 2f; \
+22: \
+ /* \
+ * switch to the virtual stack, then switch to \
+ * the userspace pagetables. \
+ */ \
+ \
+ GET_THREAD_INFO(%ebp); \
+ movl TI_virtual_stack(%ebp), %edx; \
+ movl TI_user_pgd(%ebp), %ecx; \
+ \
+ movl %esp, %ebx; \
+ andl $0x1fff, %ebx; \
+ orl %ebx, %edx; \
+int80_ret_start_marker: \
+ movl %edx, %esp; \
+ movl %ecx, %cr3; \
+ \
+ __RESTORE_ALL; \
+int80_ret_end_marker: \
+2:
+
+#else /* !CONFIG_X86_HIGH_ENTRY */
+
+#define __SWITCH_KERNELSPACE
+#define __SWITCH_USERSPACE
+
+#endif
+
+#define __SAVE_ALL \
cld; \
pushl %es; \
pushl %ds; \
@@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200)
movl %edx, %ds; \
movl %edx, %es;
-#define RESTORE_INT_REGS \
+#define __RESTORE_INT_REGS \
popl %ebx; \
popl %ecx; \
popl %edx; \
@@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200)
popl %ebp; \
popl %eax
-#define RESTORE_REGS \
- RESTORE_INT_REGS; \
-1: popl %ds; \
-2: popl %es; \
+#define __RESTORE_REGS \
+ __RESTORE_INT_REGS; \
+111: popl %ds; \
+222: popl %es; \
.section .fixup,"ax"; \
-3: movl $0,(%esp); \
- jmp 1b; \
-4: movl $0,(%esp); \
- jmp 2b; \
+444: movl $0,(%esp); \
+ jmp 111b; \
+555: movl $0,(%esp); \
+ jmp 222b; \
.previous; \
.section __ex_table,"a";\
.align 4; \
- .long 1b,3b; \
- .long 2b,4b; \
+ .long 111b,444b;\
+ .long 222b,555b;\
.previous
-
-#define RESTORE_ALL \
- RESTORE_REGS \
+#define __RESTORE_ALL \
+ __RESTORE_REGS \
addl $4, %esp; \
-1: iret; \
+333: iret; \
.section .fixup,"ax"; \
-2: sti; \
+666: sti; \
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es; \
@@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200)
.previous; \
.section __ex_table,"a";\
.align 4; \
- .long 1b,2b; \
+ .long 333b,666b;\
.previous
+#define SAVE_ALL \
+ __SAVE_ALL; \
+ __SWITCH_KERNELSPACE; \
+ STACK_OVERFLOW_TEST;
+
+#define RESTORE_ALL \
+ __SWITCH_USERSPACE; \
+ __RESTORE_ALL;
+.section .entry.text,"ax"
ENTRY(lcall7)
pushfl # We get a different stack layout with call
@@ -163,7 +280,7 @@ do_lcall:
movl %edx,EIP(%ebp) # Now we move them to their "normal" places
movl %ecx,CS(%ebp) #
andl $-8192, %ebp # GET_THREAD_INFO
- movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain
+ movl TI_exec_domain(%ebp), %edx # Get the execution domain
call *4(%edx) # Call the lcall7 handler for the domain
addl $4, %esp
popl %eax
@@ -208,7 +325,7 @@ ENTRY(resume_userspace)
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
- movl TI_FLAGS(%ebp), %ecx
+ movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
# int/exception return?
jne work_pending
@@ -216,18 +333,18 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
- cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ?
+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_all
need_resched:
- movl TI_FLAGS(%ebp), %ecx # need_resched set ?
+ movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
jz restore_all
testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all
- movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp)
+ movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp)
sti
call schedule
- movl $0,TI_PRE_COUNT(%ebp)
+ movl $0,TI_preempt_count(%ebp)
cli
jmp need_resched
#endif
@@ -246,37 +363,50 @@ sysenter_past_esp:
pushl $(__USER_CS)
pushl $SYSENTER_RETURN
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
- cmpl $__PAGE_OFFSET-3,%ebp
- jae syscall_fault
-1: movl (%ebp),%ebp
-.section __ex_table,"a"
- .align 4
- .long 1b,syscall_fault
-.previous
-
pushl %eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
cmpl $(nr_syscalls), %eax
jae syscall_badsys
- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp)
+ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp)
jnz syscall_trace_entry
call *sys_call_table(,%eax,4)
movl %eax,EAX(%esp)
cli
- movl TI_FLAGS(%ebp), %ecx
+ movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
+
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+
+ GET_THREAD_INFO(%ebp)
+ movl TI_virtual_stack(%ebp), %edx
+ movl TI_user_pgd(%ebp), %ecx
+ movl %esp, %ebx
+ andl $0x1fff, %ebx
+ orl %ebx, %edx
+sysexit_ret_start_marker:
+ movl %edx, %esp
+ movl %ecx, %cr3
+#endif
+ /*
+ * only ebx is not restored by the userspace sysenter vsyscall
+ * code, it assumes it to be callee-saved.
+ */
+ movl EBX(%esp), %ebx
+
/* if something modifies registers it must also disable sysexit */
+
movl EIP(%esp), %edx
movl OLDESP(%esp), %ecx
+
sti
sysexit
+#ifdef CONFIG_X86_SWITCH_PAGETABLES
+sysexit_ret_end_marker:
+ nop
+#endif
# system call handler stub
@@ -287,7 +417,7 @@ ENTRY(system_call)
cmpl $(nr_syscalls), %eax
jae syscall_badsys
# system call tracing in operation
- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp)
+ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp)
jnz syscall_trace_entry
syscall_call:
call *sys_call_table(,%eax,4)
@@ -296,10 +426,23 @@ syscall_exit:
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
- movl TI_FLAGS(%ebp), %ecx
+ movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work
restore_all:
+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS
+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb CS(%esp), %al
+ testl $(VM_MASK | 3), %eax
+ jz resume_kernelX # returning to kernel or vm86-space
+
+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
+ jz resume_kernelX
+
+ int $3
+
+resume_kernelX:
+#endif
RESTORE_ALL
# perform work that needs to be done immediately before resumption
@@ -312,7 +455,7 @@ work_resched:
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
- movl TI_FLAGS(%ebp), %ecx
+ movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all
@@ -327,6 +470,22 @@ work_notifysig: # deal with pending s
# vm86-space
xorl %edx, %edx
call do_notify_resume
+
+#if CONFIG_X86_HIGH_ENTRY
+ /*
+ * Reload db7 if necessary:
+ */
+ movl TI_flags(%ebp), %ecx
+ testb $_TIF_DB7, %cl
+ jnz work_db7
+
+ jmp restore_all
+
+work_db7:
+ movl TI_task(%ebp), %edx;
+ movl task_thread_db7(%edx), %edx;
+ movl %edx, %db7;
+#endif
jmp restore_all
ALIGN
@@ -382,7 +541,7 @@ syscall_badsys:
*/
.data
ENTRY(interrupt)
-.text
+.previous
vector=0
ENTRY(irq_entries_start)
@@ -392,7 +551,7 @@ ENTRY(irq_entries_start)
jmp common_interrupt
.data
.long 1b
-.text
+.previous
vector=vector+1
.endr
@@ -433,12 +592,17 @@ error_code:
movl ES(%esp), %edi # get the function address
movl %eax, ORIG_EAX(%esp)
movl %ecx, ES(%esp)
- movl %esp, %edx
pushl %esi # push the error code
- pushl %edx # push the pt_regs pointer
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
+
+/* clobbers edx, ebx and ebp */
+ __SWITCH_KERNELSPACE
+
+ leal 4(%esp), %edx # prepare pt_regs
+ pushl %edx # push pt_regs
+
call *%edi
addl $8, %esp
jmp ret_from_exception
@@ -515,8 +679,8 @@ ENTRY(nmi)
/* Do not access memory above the end of our stack page,
* it might not exist.
*/
- andl $0x1fff,%eax
- cmpl $0x1fec,%eax
+ andl $(THREAD_SIZE-1),%eax
+ cmpl $(THREAD_SIZE-20),%eax
popl %eax
jae nmi_stack_correct
cmpl $sysenter_entry,12(%esp)
@@ -529,7 +693,7 @@ nmi_stack_correct:
pushl %edx
call do_nmi
addl $8, %esp
- RESTORE_ALL
+ jmp restore_all
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
@@ -606,6 +770,8 @@ ENTRY(spurious_interrupt_bug)
pushl $do_spurious_interrupt_bug
jmp error_code
+.previous
+
.data
ENTRY(sys_call_table)
.long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/i386/kernel/entry_trampoline.c 2004-02-20 00:21:53.000000000 -0800
@@ -0,0 +1,73 @@
+/*
+ * linux/arch/i386/kernel/entry_trampoline.c
+ *
+ * (C) Copyright 2003 Ingo Molnar
+ *
+ * This file contains the needed support code for 4GB userspace
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text;
+
+void __init init_entry_mappings(void)
+{
+#ifdef CONFIG_X86_HIGH_ENTRY
+ void *tramp;
+
+ /*
+ * We need a high IDT and GDT for the 4G/4G split:
+ */
+ trap_init_virtual_IDT();
+
+ __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL);
+ __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL);
+ tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0);
+
+ printk("mapped 4G/4G trampoline to %p.\n", tramp);
+ BUG_ON((void *)&__start___entry_text != tramp);
+ /*
+ * Virtual kernel stack:
+ */
+ BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191);
+ BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE);
+ BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE);
+
+ /*
+ * set up the initial thread's virtual stack related
+ * fields:
+ */
+ current->thread.stack_page0 = virt_to_page((char *)current->thread_info);
+ current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE);
+ current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0);
+
+ __kunmap_atomic_type(KM_VSTACK0);
+ __kunmap_atomic_type(KM_VSTACK1);
+ __kmap_atomic(current->thread.stack_page0, KM_VSTACK0);
+ __kmap_atomic(current->thread.stack_page1, KM_VSTACK1);
+
+#endif
+ current->thread_info->real_stack = (void *)current->thread_info;
+ current->thread_info->user_pgd = NULL;
+ current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE;
+}
+
+
+
+void __init entry_trampoline_setup(void)
+{
+ /*
+ * old IRQ entries set up by the boot code will still hang
+ * around - they are a sign of hw trouble anyway, now they'll
+ * produce a double fault message.
+ */
+ trap_init_virtual_GDT();
+}
--- linux-2.6.3/arch/i386/kernel/head.S 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/head.S 2004-02-20 00:21:54.000000000 -0800
@@ -16,6 +16,8 @@
#include
#include
#include
+#include
+#include
#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
@@ -325,12 +327,12 @@ rp_sidt:
ret
ENTRY(stack_start)
- .long init_thread_union+8192
+ .long init_thread_union+THREAD_SIZE
.long __BOOT_DS
/* This is the default interrupt "handler" :-) */
int_msg:
- .asciz "Unknown interrupt\n"
+ .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
ALIGN
ignore_int:
cld
@@ -342,9 +344,17 @@ ignore_int:
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
+ pushl 16(%esp)
+ pushl 24(%esp)
+ pushl 32(%esp)
+ pushl 40(%esp)
pushl $int_msg
call printk
popl %eax
+ popl %eax
+ popl %eax
+ popl %eax
+ popl %eax
popl %ds
popl %es
popl %edx
@@ -377,23 +387,27 @@ cpu_gdt_descr:
.fill NR_CPUS-1,8,0 # space for the other GDT descriptors
/*
- * This is initialized to create an identity-mapping at 0-8M (for bootup
- * purposes) and another mapping of the 0-8M area at virtual address
+ * This is initialized to create an identity-mapping at 0-16M (for bootup
+ * purposes) and another mapping of the 0-16M area at virtual address
* PAGE_OFFSET.
*/
.org 0x1000
ENTRY(swapper_pg_dir)
.long 0x00102007
.long 0x00103007
- .fill BOOT_USER_PGD_PTRS-2,4,0
- /* default: 766 entries */
+ .long 0x00104007
+ .long 0x00105007
+ .fill BOOT_USER_PGD_PTRS-4,4,0
+ /* default: 764 entries */
.long 0x00102007
.long 0x00103007
- /* default: 254 entries */
- .fill BOOT_KERNEL_PGD_PTRS-2,4,0
+ .long 0x00104007
+ .long 0x00105007
+ /* default: 252 entries */
+ .fill BOOT_KERNEL_PGD_PTRS-4,4,0
/*
- * The page tables are initialized to only 8MB here - the final page
+ * The page tables are initialized to only 16MB here - the final page
* tables are set up later depending on memory size.
*/
.org 0x2000
@@ -402,15 +416,21 @@ ENTRY(pg0)
.org 0x3000
ENTRY(pg1)
+.org 0x4000
+ENTRY(pg2)
+
+.org 0x5000
+ENTRY(pg3)
+
/*
* empty_zero_page must immediately follow the page tables ! (The
* initialization loop counts until empty_zero_page)
*/
-.org 0x4000
+.org 0x6000
ENTRY(empty_zero_page)
-.org 0x5000
+.org 0x7000
/*
* Real beginning of normal "text" segment
@@ -419,12 +439,12 @@ ENTRY(stext)
ENTRY(_stext)
/*
- * This starts the data section. Note that the above is all
- * in the text section because it has alignment requirements
- * that we cannot fulfill any other way.
+ * This starts the data section.
*/
.data
+.align PAGE_SIZE_asm
+
/*
* The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
@@ -439,7 +459,9 @@ ENTRY(boot_gdt_table)
.quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
#endif
- .align L1_CACHE_BYTES
+
+.align PAGE_SIZE_asm
+
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* 0x0b reserved */
--- linux-2.6.3/arch/i386/kernel/i386_ksyms.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/i386_ksyms.c 2004-02-20 00:21:54.000000000 -0800
@@ -97,7 +97,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter
EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
EXPORT_SYMBOL_NOVERS(__up_wakeup);
/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_generic);
/* Delay loops */
EXPORT_SYMBOL(__ndelay);
EXPORT_SYMBOL(__udelay);
@@ -111,13 +110,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
+#if !defined(CONFIG_X86_UACCESS_INDIRECT)
EXPORT_SYMBOL(strncpy_from_user);
-EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__direct_strncpy_from_user);
EXPORT_SYMBOL(clear_user);
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__copy_from_user_ll);
EXPORT_SYMBOL(__copy_to_user_ll);
EXPORT_SYMBOL(strnlen_user);
+#else /* CONFIG_X86_UACCESS_INDIRECT */
+EXPORT_SYMBOL(direct_csum_partial_copy_generic);
+#endif
EXPORT_SYMBOL(dma_alloc_coherent);
EXPORT_SYMBOL(dma_free_coherent);
--- linux-2.6.3/arch/i386/kernel/i387.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/i387.c 2004-02-20 00:21:54.000000000 -0800
@@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct *
static int convert_fxsr_to_user( struct _fpstate __user *buf,
struct i387_fxsave_struct *fxsave )
{
+ struct _fpreg tmp[8]; /* 80 bytes scratch area */
unsigned long env[7];
struct _fpreg __user *to;
struct _fpxreg *from;
@@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct
if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
return 1;
- to = &buf->_st[0];
+ to = tmp;
from = (struct _fpxreg *) &fxsave->st_space[0];
for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
unsigned long *t = (unsigned long *)to;
unsigned long *f = (unsigned long *)from;
- if (__put_user(*f, t) ||
- __put_user(*(f + 1), t + 1) ||
- __put_user(from->exponent, &to->exponent))
- return 1;
+ *t = *f;
+ *(t + 1) = *(f+1);
+ to->exponent = from->exponent;
}
+ if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8])))
+ return 1;
return 0;
}
static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
struct _fpstate __user *buf )
{
+ struct _fpreg tmp[8]; /* 80 bytes scratch area */
unsigned long env[7];
struct _fpxreg *to;
struct _fpreg __user *from;
@@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc
if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
return 1;
+ if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8])))
+ return 1;
fxsave->cwd = (unsigned short)(env[0] & 0xffff);
fxsave->swd = (unsigned short)(env[1] & 0xffff);
@@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc
fxsave->fos = env[6];
to = (struct _fpxreg *) &fxsave->st_space[0];
- from = &buf->_st[0];
+ from = tmp;
for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
unsigned long *t = (unsigned long *)to;
unsigned long *f = (unsigned long *)from;
- if (__get_user(*t, f) ||
- __get_user(*(t + 1), f + 1) ||
- __get_user(to->exponent, &from->exponent))
- return 1;
+ *t = *f;
+ *(t + 1) = *(f + 1);
+ to->exponent = from->exponent;
}
return 0;
}
--- linux-2.6.3/arch/i386/kernel/i8259.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/i8259.c 2004-02-20 00:20:36.000000000 -0800
@@ -258,7 +258,7 @@ static int __init i8259A_init_sysfs(void
{
int error = sysdev_class_register(&i8259_sysdev_class);
if (!error)
- error = sys_device_register(&device_i8259A);
+ error = sysdev_register(&device_i8259A);
return error;
}
@@ -401,7 +401,7 @@ static int __init init_timer_sysfs(void)
{
int error = sysdev_class_register(&timer_sysclass);
if (!error)
- error = sys_device_register(&device_timer);
+ error = sysdev_register(&device_timer);
return error;
}
--- linux-2.6.3/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/init_task.c 2004-02-20 00:21:54.000000000 -0800
@@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm);
*/
union thread_union init_thread_union
__attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+ { INIT_THREAD_INFO(init_task, init_thread_union) };
/*
* Initial task structure.
@@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task);
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS };
--- linux-2.6.3/arch/i386/kernel/io_apic.c 2004-02-03 20:42:34.000000000 -0800
+++ 25/arch/i386/kernel/io_apic.c 2004-02-20 00:20:52.000000000 -0800
@@ -280,7 +280,7 @@ static void set_ioapic_affinity_irq(unsi
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#if defined(CONFIG_SMP)
+#if defined(CONFIG_IRQBALANCE)
# include /* kernel_thread() */
# include /* kstat */
# include /* kmalloc() */
@@ -317,8 +317,7 @@ struct irq_cpu_info {
#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-#define CPU_TO_PACKAGEINDEX(i) \
- ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i)
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
@@ -401,6 +400,7 @@ static void do_irq_balance(void)
unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
unsigned long move_this_load = 0;
int max_loaded = 0, min_loaded = 0;
+ int load;
unsigned long useful_load_threshold = balanced_irq_interval + 10;
int selected_irq;
int tmp_loaded, first_attempt = 1;
@@ -452,7 +452,7 @@ static void do_irq_balance(void)
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_online(i))
continue;
- if (physical_balance && i > cpu_sibling_map[i])
+ if (i != CPU_TO_PACKAGEINDEX(i))
continue;
if (min_cpu_irq > CPU_IRQ(i)) {
min_cpu_irq = CPU_IRQ(i);
@@ -471,7 +471,7 @@ tryanothercpu:
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_online(i))
continue;
- if (physical_balance && i > cpu_sibling_map[i])
+ if (i != CPU_TO_PACKAGEINDEX(i))
continue;
if (max_cpu_irq <= CPU_IRQ(i))
continue;
@@ -551,9 +551,14 @@ tryanotherirq:
* We seek the least loaded sibling by making the comparison
* (A+B)/2 vs B
*/
- if (physical_balance && (CPU_IRQ(min_loaded) >> 1) >
- CPU_IRQ(cpu_sibling_map[min_loaded]))
- min_loaded = cpu_sibling_map[min_loaded];
+ load = CPU_IRQ(min_loaded) >> 1;
+ for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+ if (load > CPU_IRQ(j)) {
+ /* This won't change cpu_sibling_map[min_loaded] */
+ load = CPU_IRQ(j);
+ min_loaded = j;
+ }
+ }
cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
target_cpu_mask = cpumask_of_cpu(min_loaded);
@@ -689,9 +694,11 @@ static inline void move_irq(int irq)
__initcall(balanced_irq_init);
-#else /* !SMP */
+#else /* !CONFIG_IRQBALANCE */
static inline void move_irq(int irq) { }
+#endif /* CONFIG_IRQBALANCE */
+#ifndef CONFIG_SMP
void send_IPI_self(int vector)
{
unsigned int cfg;
@@ -706,7 +713,7 @@ void send_IPI_self(int vector)
*/
apic_write_around(APIC_ICR, cfg);
}
-#endif /* defined(CONFIG_SMP) */
+#endif /* !CONFIG_SMP */
/*
@@ -2150,6 +2157,10 @@ static inline void check_timer(void)
{
int pin1, pin2;
int vector;
+ unsigned int ver;
+
+ ver = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(ver);
/*
* get/set the timer IRQ vector:
@@ -2163,11 +2174,17 @@ static inline void check_timer(void)
* mode for the 8259A whenever interrupts are routed
* through I/O APICs. Also IRQ0 has to be enabled in
* the 8259A which implies the virtual wire has to be
- * disabled in the local APIC.
+ * disabled in the local APIC. Finally timer interrupts
+ * need to be acknowledged manually in the 8259A for
+ * do_slow_timeoffset() and for the i82489DX when using
+ * the NMI watchdog.
*/
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
- timer_ack = 1;
+ if (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver))
+ timer_ack = 1;
+ else
+ timer_ack = !cpu_has_tsc;
enable_8259A_irq(0);
pin1 = find_isa_irq_pin(0, mp_INT);
@@ -2185,7 +2202,8 @@ static inline void check_timer(void)
disable_8259A_irq(0);
setup_nmi();
enable_8259A_irq(0);
- check_nmi_watchdog();
+ if (check_nmi_watchdog() < 0);
+ timer_ack = !cpu_has_tsc;
}
return;
}
@@ -2208,7 +2226,8 @@ static inline void check_timer(void)
add_pin_to_irq(0, 0, pin2);
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- check_nmi_watchdog();
+ if (check_nmi_watchdog() < 0);
+ timer_ack = !cpu_has_tsc;
}
return;
}
--- linux-2.6.3/arch/i386/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/irq.c 2004-02-20 00:20:42.000000000 -0800
@@ -435,7 +435,7 @@ asmlinkage unsigned int do_IRQ(struct pt
long esp;
__asm__ __volatile__("andl %%esp,%0" :
- "=r" (esp) : "0" (8191));
+ "=r" (esp) : "0" (THREAD_SIZE - 1));
if (unlikely(esp < (sizeof(struct thread_info) + 1024))) {
printk("do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct thread_info));
@@ -508,6 +508,8 @@ out:
irq_exit();
+ kgdb_process_breakpoint();
+
return 1;
}
@@ -927,7 +929,7 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 .
static int irq_affinity_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_snprintf(page, count, irq_affinity[(long)data]);
+ int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
@@ -968,7 +970,7 @@ static int irq_affinity_write_proc(struc
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_snprintf(page, count, *(cpumask_t *)data);
+ int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/i386/kernel/kgdb_stub.c 2004-02-20 00:20:43.000000000 -0800
@@ -0,0 +1,2457 @@
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+/*
+ * Copyright (c) 2000 VERITAS Software Corporation.
+ *
+ */
+/****************************************************************************
+ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $
+ *
+ * Module name: remcom.c $
+ * Revision: 1.34 $
+ * Date: 91/03/09 12:29:49 $
+ * Contributor: Lake Stevens Instrument Division$
+ *
+ * Description: low level support for gdb debugger. $
+ *
+ * Considerations: only works on target hardware $
+ *
+ * Written by: Glenn Engel $
+ * Updated by: David Grothe
+ * Updated by: Robert Walsh
+ * Updated by: wangdi
+ * ModuleState: Experimental $
+ *
+ * NOTES: See Below $
+ *
+ * Modified for 386 by Jim Kingdon, Cygnus Support.
+ * Compatibility with 2.1.xx kernel by David Grothe
+ *
+ * Changes to allow auto initilization. All that is needed is that it
+ * be linked with the kernel and a break point (int 3) be executed.
+ * The header file defines BREAKPOINT to allow one to do
+ * this. It should also be possible, once the interrupt system is up, to
+ * call putDebugChar("+"). Once this is done, the remote debugger should
+ * get our attention by sending a ^C in a packet. George Anzinger
+ *
+ * Integrated into 2.2.5 kernel by Tigran Aivazian
+ * Added thread support, support for multiple processors,
+ * support for ia-32(x86) hardware debugging.
+ * Amit S. Kale ( akale@veritas.com )
+ *
+ * Modified to support debugging over ethernet by Robert Walsh
+ * and wangdi , based on
+ * code by San Mehat.
+ *
+ *
+ * To enable debugger support, two things need to happen. One, a
+ * call to set_debug_traps() is necessary in order to allow any breakpoints
+ * or error conditions to be properly intercepted and reported to gdb.
+ * Two, a breakpoint needs to be generated to begin communication. This
+ * is most easily accomplished by a call to breakpoint(). Breakpoint()
+ * simulates a breakpoint by executing an int 3.
+ *
+ *************
+ *
+ * The following gdb commands are supported:
+ *
+ * command function Return value
+ *
+ * g return the value of the CPU registers hex data or ENN
+ * G set the value of the CPU registers OK or ENN
+ *
+ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN
+ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN
+ *
+ * c Resume at current address SNN ( signal NN)
+ * cAA..AA Continue at address AA..AA SNN
+ *
+ * s Step one instruction SNN
+ * sAA..AA Step one instruction from AA..AA SNN
+ *
+ * k kill
+ *
+ * ? What was the last sigval ? SNN (signal NN)
+ *
+ * All commands and responses are sent with a packet which includes a
+ * checksum. A packet consists of
+ *
+ * $#.
+ *
+ * where
+ * ::
+ * :: < two hex digits computed as modulo 256 sum of >
+ *
+ * When a packet is received, it is first acknowledged with either '+' or '-'.
+ * '+' indicates a successful transfer. '-' indicates a failed transfer.
+ *
+ * Example:
+ *
+ * Host: Reply:
+ * $m0,10#2a +$00010203040506070809101112131415#42
+ *
+ ****************************************************************************/
+#define KGDB_VERSION "<20030915.1651.33>"
+#include
+#include
+#include /* for strcpy */
+#include
+#include
+#include
+#include
+#include /* for linux pt_regs struct */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+/************************************************************************
+ *
+ * external low-level support routines
+ */
+typedef void (*Function) (void); /* pointer to a function */
+
+/* Thread reference */
+typedef unsigned char threadref[8];
+
+extern int tty_putDebugChar(int); /* write a single character */
+extern int tty_getDebugChar(void); /* read and return a single char */
+extern void tty_flushDebugChar(void); /* flush pending characters */
+extern int eth_putDebugChar(int); /* write a single character */
+extern int eth_getDebugChar(void); /* read and return a single char */
+extern void eth_flushDebugChar(void); /* flush pending characters */
+
+/************************************************************************/
+/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/
+/* at least NUMREGBYTES*2 are needed for register packets */
+/* Longer buffer is needed to list all threads */
+#define BUFMAX 400
+
+char *kgdb_version = KGDB_VERSION;
+
+/* debug > 0 prints ill-formed commands in valid packets & checksum errors */
+int debug_regs = 0; /* set to non-zero to print registers */
+
+/* filled in by an external module */
+char *gdb_module_offsets;
+
+static const char hexchars[] = "0123456789abcdef";
+
+/* Number of bytes of registers. */
+#define NUMREGBYTES 64
+/*
+ * Note that this register image is in a different order than
+ * the register image that Linux produces at interrupt time.
+ *
+ * Linux's register image is defined by struct pt_regs in ptrace.h.
+ * Just why GDB uses a different order is a historical mystery.
+ */
+enum regnames { _EAX, /* 0 */
+ _ECX, /* 1 */
+ _EDX, /* 2 */
+ _EBX, /* 3 */
+ _ESP, /* 4 */
+ _EBP, /* 5 */
+ _ESI, /* 6 */
+ _EDI, /* 7 */
+ _PC /* 8 also known as eip */ ,
+ _PS /* 9 also known as eflags */ ,
+ _CS, /* 10 */
+ _SS, /* 11 */
+ _DS, /* 12 */
+ _ES, /* 13 */
+ _FS, /* 14 */
+ _GS /* 15 */
+};
+
+/*************************** ASSEMBLY CODE MACROS *************************/
+/*
+ * Put the error code here just in case the user cares.
+ * Likewise, the vector number here (since GDB only gets the signal
+ * number through the usual means, and that's not very specific).
+ * The called_from is the return address so he can tell how we entered kgdb.
+ * This will allow him to seperate out the various possible entries.
+ */
+#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */
+
+#define PID_MAX PID_MAX_DEFAULT
+
+#ifdef CONFIG_SMP
+void smp_send_nmi_allbutself(void);
+#define IF_SMP(x) x
+#undef MAX_NO_CPUS
+#ifndef CONFIG_NO_KGDB_CPUS
+#define CONFIG_NO_KGDB_CPUS 2
+#endif
+#if CONFIG_NO_KGDB_CPUS > NR_CPUS
+#define MAX_NO_CPUS NR_CPUS
+#else
+#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS
+#endif
+#define hold_init hold_on_sstep: 1,
+#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL)
+#define NUM_CPUS num_online_cpus()
+#else
+#define IF_SMP(x)
+#define hold_init
+#undef MAX_NO_CPUS
+#define MAX_NO_CPUS 1
+#define NUM_CPUS 1
+#endif
+#define NOCPU (struct task_struct *)0xbad1fbad
+/* *INDENT-OFF* */
+struct kgdb_info {
+ int used_malloc;
+ void *called_from;
+ long long entry_tsc;
+ int errcode;
+ int vector;
+ int print_debug_info;
+#ifdef CONFIG_SMP
+ int hold_on_sstep;
+ struct {
+ volatile struct task_struct *task;
+ int pid;
+ int hold;
+ struct pt_regs *regs;
+ } cpus_waiting[MAX_NO_CPUS];
+#endif
+} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1};
+
+/* *INDENT-ON* */
+
+#define used_m kgdb_info.used_malloc
+/*
+ * This is little area we set aside to contain the stack we
+ * need to build to allow gdb to call functions. We use one
+ * per cpu to avoid locking issues. We will do all this work
+ * with interrupts off so that should take care of the protection
+ * issues.
+ */
+#define LOOKASIDE_SIZE 200 /* should be more than enough */
+#define MALLOC_MAX 200 /* Max malloc size */
+struct {
+ unsigned int esp;
+ int array[LOOKASIDE_SIZE];
+} fn_call_lookaside[MAX_NO_CPUS];
+
+static int trap_cpu;
+static unsigned int OLD_esp;
+
+#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE]
+#define IF_BIT 0x200
+#define TF_BIT 0x100
+
+#define MALLOC_ROUND 8-1
+
+static char malloc_array[MALLOC_MAX];
+IF_SMP(static void to_gdb(const char *mess));
+void *
+malloc(int size)
+{
+
+ if (size <= (MALLOC_MAX - used_m)) {
+ int old_used = used_m;
+ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND));
+ return &malloc_array[old_used];
+ } else {
+ return NULL;
+ }
+}
+
+/*
+ * I/O dispatch functions...
+ * Based upon kgdboe, either call the ethernet
+ * handler or the serial one..
+ */
+void
+putDebugChar(int c)
+{
+ if (!kgdboe) {
+ tty_putDebugChar(c);
+ } else {
+ eth_putDebugChar(c);
+ }
+}
+
+int
+getDebugChar(void)
+{
+ if (!kgdboe) {
+ return tty_getDebugChar();
+ } else {
+ return eth_getDebugChar();
+ }
+}
+
+void
+flushDebugChar(void)
+{
+ if (!kgdboe) {
+ tty_flushDebugChar();
+ } else {
+ eth_flushDebugChar();
+ }
+}
+
+/*
+ * Gdb calls functions by pushing agruments, including a return address
+ * on the stack and the adjusting EIP to point to the function. The
+ * whole assumption in GDB is that we are on a different stack than the
+ * one the "user" i.e. code that hit the break point, is on. This, of
+ * course is not true in the kernel. Thus various dodges are needed to
+ * do the call without directly messing with EIP (which we can not change
+ * as it is just a location and not a register. To adjust it would then
+ * require that we move every thing below EIP up or down as needed. This
+ * will not work as we may well have stack relative pointer on the stack
+ * (such as the pointer to regs, for example).
+
+ * So here is what we do:
+ * We detect gdb attempting to store into the stack area and instead, store
+ * into the fn_call_lookaside.array at the same relative location as if it
+ * were the area ESP pointed at. We also trap ESP modifications
+ * and uses these to adjust fn_call_lookaside.esp. On entry
+ * fn_call_lookaside.esp will be set to point at the last entry in
+ * fn_call_lookaside.array. This allows us to check if it has changed, and
+ * if so, on exit, we add the registers we will use to do the move and a
+ * trap/ interrupt return exit sequence. We then adjust the eflags in the
+ * regs array (remember we now have a copy in the fn_call_lookaside.array) to
+ * kill the interrupt bit, AND we change EIP to point at our set up stub.
+ * As part of the register set up we preset the registers to point at the
+ * begining and end of the fn_call_lookaside.array, so all the stub needs to
+ * do is move words from the array to the stack until ESP= the desired value
+ * then do the rti. This will then transfer to the desired function with
+ * all the correct registers. Nifty huh?
+ */
+extern asmlinkage void fn_call_stub(void);
+extern asmlinkage void fn_rtn_stub(void);
+/* *INDENT-OFF* */
+__asm__("fn_rtn_stub:\n\t"
+ "movl %eax,%esp\n\t"
+ "fn_call_stub:\n\t"
+ "1:\n\t"
+ "addl $-4,%ebx\n\t"
+ "movl (%ebx), %eax\n\t"
+ "pushl %eax\n\t"
+ "cmpl %esp,%ecx\n\t"
+ "jne 1b\n\t"
+ "popl %eax\n\t"
+ "popl %ebx\n\t"
+ "popl %ecx\n\t"
+ "iret \n\t");
+/* *INDENT-ON* */
+#define gdb_i386vector kgdb_info.vector
+#define gdb_i386errcode kgdb_info.errcode
+#define waiting_cpus kgdb_info.cpus_waiting
+#define remote_debug kgdb_info.print_debug_info
+#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold
+/* gdb locks */
+
+#ifdef CONFIG_SMP
+static int in_kgdb_called;
+static spinlock_t waitlocks[MAX_NO_CPUS] =
+ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED };
+/*
+ * The following array has the thread pointer of each of the "other"
+ * cpus. We make it global so it can be seen by gdb.
+ */
+volatile int in_kgdb_entry_log[MAX_NO_CPUS];
+volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS];
+/*
+static spinlock_t continuelocks[MAX_NO_CPUS];
+*/
+spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED;
+/* waiters on our spinlock plus us */
+static atomic_t spinlock_waiters = ATOMIC_INIT(1);
+static int spinlock_count = 0;
+static int spinlock_cpu = 0;
+/*
+ * Note we use nested spin locks to account for the case where a break
+ * point is encountered when calling a function by user direction from
+ * kgdb. Also there is the memory exception recursion to account for.
+ * Well, yes, but this lets other cpus thru too. Lets add a
+ * cpu id to the lock.
+ */
+#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \
+ spinlock_cpu != smp_processor_id()){\
+ atomic_inc(&spinlock_waiters); \
+ while (! spin_trylock(x)) {\
+ in_kgdb(®s);\
+ }\
+ atomic_dec(&spinlock_waiters); \
+ spinlock_count = 1; \
+ spinlock_cpu = smp_processor_id(); \
+ }else{ \
+ spinlock_count++; \
+ }
+#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x)
+#else
+unsigned kgdb_spinlock = 0;
+#define KGDB_SPIN_LOCK(x) --*x
+#define KGDB_SPIN_UNLOCK(x) ++*x
+#endif
+
+int
+hex(char ch)
+{
+ if ((ch >= 'a') && (ch <= 'f'))
+ return (ch - 'a' + 10);
+ if ((ch >= '0') && (ch <= '9'))
+ return (ch - '0');
+ if ((ch >= 'A') && (ch <= 'F'))
+ return (ch - 'A' + 10);
+ return (-1);
+}
+
+/* scan for the sequence $# */
+void
+getpacket(char *buffer)
+{
+ unsigned char checksum;
+ unsigned char xmitcsum;
+ int i;
+ int count;
+ char ch;
+
+ do {
+ /* wait around for the start character, ignore all other characters */
+ while ((ch = (getDebugChar() & 0x7f)) != '$') ;
+ checksum = 0;
+ xmitcsum = -1;
+
+ count = 0;
+
+ /* now, read until a # or end of buffer is found */
+ while (count < BUFMAX) {
+ ch = getDebugChar() & 0x7f;
+ if (ch == '#')
+ break;
+ checksum = checksum + ch;
+ buffer[count] = ch;
+ count = count + 1;
+ }
+ buffer[count] = 0;
+
+ if (ch == '#') {
+ xmitcsum = hex(getDebugChar() & 0x7f) << 4;
+ xmitcsum += hex(getDebugChar() & 0x7f);
+ if ((remote_debug) && (checksum != xmitcsum)) {
+ printk
+ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n",
+ checksum, xmitcsum, buffer);
+ }
+
+ if (checksum != xmitcsum)
+ putDebugChar('-'); /* failed checksum */
+ else {
+ putDebugChar('+'); /* successful transfer */
+ /* if a sequence char is present, reply the sequence ID */
+ if (buffer[2] == ':') {
+ putDebugChar(buffer[0]);
+ putDebugChar(buffer[1]);
+ /* remove sequence chars from buffer */
+ count = strlen(buffer);
+ for (i = 3; i <= count; i++)
+ buffer[i - 3] = buffer[i];
+ }
+ }
+ }
+ } while (checksum != xmitcsum);
+
+ if (remote_debug)
+ printk("R:%s\n", buffer);
+ flushDebugChar();
+}
+
+/* send the packet in buffer. */
+
+void
+putpacket(char *buffer)
+{
+ unsigned char checksum;
+ int count;
+ char ch;
+
+ /* $#. */
+
+ if (!kgdboe) {
+ do {
+ if (remote_debug)
+ printk("T:%s\n", buffer);
+ putDebugChar('$');
+ checksum = 0;
+ count = 0;
+
+ while ((ch = buffer[count])) {
+ putDebugChar(ch);
+ checksum += ch;
+ count += 1;
+ }
+
+ putDebugChar('#');
+ putDebugChar(hexchars[checksum >> 4]);
+ putDebugChar(hexchars[checksum % 16]);
+ flushDebugChar();
+
+ } while ((getDebugChar() & 0x7f) != '+');
+ } else {
+ /*
+ * For udp, we can not transfer too much bytes once.
+ * We only transfer MAX_SEND_COUNT size bytes each time
+ */
+
+#define MAX_SEND_COUNT 30
+
+ int send_count = 0, i = 0;
+ char send_buf[MAX_SEND_COUNT];
+
+ do {
+ if (remote_debug)
+ printk("T:%s\n", buffer);
+ putDebugChar('$');
+ checksum = 0;
+ count = 0;
+ send_count = 0;
+ while ((ch = buffer[count])) {
+ if (send_count >= MAX_SEND_COUNT) {
+ for(i = 0; i < MAX_SEND_COUNT; i++) {
+ putDebugChar(send_buf[i]);
+ }
+ flushDebugChar();
+ send_count = 0;
+ } else {
+ send_buf[send_count] = ch;
+ checksum += ch;
+ count ++;
+ send_count++;
+ }
+ }
+ for(i = 0; i < send_count; i++)
+ putDebugChar(send_buf[i]);
+ putDebugChar('#');
+ putDebugChar(hexchars[checksum >> 4]);
+ putDebugChar(hexchars[checksum % 16]);
+ flushDebugChar();
+ } while ((getDebugChar() & 0x7f) != '+');
+ }
+}
+
+static char remcomInBuffer[BUFMAX];
+static char remcomOutBuffer[BUFMAX];
+static short error;
+
+void
+debug_error(char *format, char *parm)
+{
+ if (remote_debug)
+ printk(format, parm);
+}
+
+static void
+print_regs(struct pt_regs *regs)
+{
+ printk("EAX=%08lx ", regs->eax);
+ printk("EBX=%08lx ", regs->ebx);
+ printk("ECX=%08lx ", regs->ecx);
+ printk("EDX=%08lx ", regs->edx);
+ printk("\n");
+ printk("ESI=%08lx ", regs->esi);
+ printk("EDI=%08lx ", regs->edi);
+ printk("EBP=%08lx ", regs->ebp);
+ printk("ESP=%08lx ", (long) ®s->esp);
+ printk("\n");
+ printk(" DS=%08x ", regs->xds);
+ printk(" ES=%08x ", regs->xes);
+ printk(" SS=%08x ", __KERNEL_DS);
+ printk(" FL=%08lx ", regs->eflags);
+ printk("\n");
+ printk(" CS=%08x ", regs->xcs);
+ printk(" IP=%08lx ", regs->eip);
+#if 0
+ printk(" FS=%08x ", regs->fs);
+ printk(" GS=%08x ", regs->gs);
+#endif
+ printk("\n");
+
+} /* print_regs */
+
+#define NEW_esp fn_call_lookaside[trap_cpu].esp
+
+static void
+regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs)
+{
+ gdb_regs[_EAX] = regs->eax;
+ gdb_regs[_EBX] = regs->ebx;
+ gdb_regs[_ECX] = regs->ecx;
+ gdb_regs[_EDX] = regs->edx;
+ gdb_regs[_ESI] = regs->esi;
+ gdb_regs[_EDI] = regs->edi;
+ gdb_regs[_EBP] = regs->ebp;
+ gdb_regs[_DS] = regs->xds;
+ gdb_regs[_ES] = regs->xes;
+ gdb_regs[_PS] = regs->eflags;
+ gdb_regs[_CS] = regs->xcs;
+ gdb_regs[_PC] = regs->eip;
+ /* Note, as we are a debugging the kernel, we will always
+ * trap in kernel code, this means no priviledge change,
+ * and so the pt_regs structure is not completely valid. In a non
+ * privilege change trap, only EFLAGS, CS and EIP are put on the stack,
+ * SS and ESP are not stacked, this means that the last 2 elements of
+ * pt_regs is not valid (they would normally refer to the user stack)
+ * also, using regs+1 is no good because you end up will a value that is
+ * 2 longs (8) too high. This used to cause stepping over functions
+ * to fail, so my fix is to use the address of regs->esp, which
+ * should point at the end of the stack frame. Note I have ignored
+ * completely exceptions that cause an error code to be stacked, such
+ * as double fault. Stuart Hughes, Zentropix.
+ * original code: gdb_regs[_ESP] = (int) (regs + 1) ;
+
+ * this is now done on entry and moved to OLD_esp (as well as NEW_esp).
+ */
+ gdb_regs[_ESP] = NEW_esp;
+ gdb_regs[_SS] = __KERNEL_DS;
+ gdb_regs[_FS] = 0xFFFF;
+ gdb_regs[_GS] = 0xFFFF;
+} /* regs_to_gdb_regs */
+
+static void
+gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs)
+{
+ regs->eax = gdb_regs[_EAX];
+ regs->ebx = gdb_regs[_EBX];
+ regs->ecx = gdb_regs[_ECX];
+ regs->edx = gdb_regs[_EDX];
+ regs->esi = gdb_regs[_ESI];
+ regs->edi = gdb_regs[_EDI];
+ regs->ebp = gdb_regs[_EBP];
+ regs->xds = gdb_regs[_DS];
+ regs->xes = gdb_regs[_ES];
+ regs->eflags = gdb_regs[_PS];
+ regs->xcs = gdb_regs[_CS];
+ regs->eip = gdb_regs[_PC];
+ NEW_esp = gdb_regs[_ESP]; /* keep the value */
+#if 0 /* can't change these */
+ regs->esp = gdb_regs[_ESP];
+ regs->xss = gdb_regs[_SS];
+ regs->fs = gdb_regs[_FS];
+ regs->gs = gdb_regs[_GS];
+#endif
+
+} /* gdb_regs_to_regs */
+extern void scheduling_functions_start_here(void);
+extern void scheduling_functions_end_here(void);
+#define first_sched ((unsigned long) scheduling_functions_start_here)
+#define last_sched ((unsigned long) scheduling_functions_end_here)
+
+int thread_list = 0;
+
+void
+get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs)
+{
+ unsigned long stack_page;
+ int count = 0;
+ IF_SMP(int i);
+ if (!p || p == current) {
+ regs_to_gdb_regs(gdb_regs, regs);
+ return;
+ }
+#ifdef CONFIG_SMP
+ for (i = 0; i < MAX_NO_CPUS; i++) {
+ if (p == kgdb_info.cpus_waiting[i].task) {
+ regs_to_gdb_regs(gdb_regs,
+ kgdb_info.cpus_waiting[i].regs);
+ gdb_regs[_ESP] =
+ (int) &kgdb_info.cpus_waiting[i].regs->esp;
+
+ return;
+ }
+ }
+#endif
+ memset(gdb_regs, 0, NUMREGBYTES);
+ gdb_regs[_ESP] = p->thread.esp;
+ gdb_regs[_PC] = p->thread.eip;
+ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP];
+ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4);
+ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8);
+
+/*
+ * This code is to give a more informative notion of where a process
+ * is waiting. It is used only when the user asks for a thread info
+ * list. If he then switches to the thread, s/he will find the task
+ * is in schedule, but a back trace should show the same info we come
+ * up with. This code was shamelessly purloined from process.c. It was
+ * then enhanced to provide more registers than simply the program
+ * counter.
+ */
+
+ if (!thread_list) {
+ return;
+ }
+
+ if (p->state == TASK_RUNNING)
+ return;
+ stack_page = (unsigned long) p->thread_info;
+ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page)
+ return;
+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */
+ do {
+ if (gdb_regs[_EBP] < stack_page ||
+ gdb_regs[_EBP] > 8184 + stack_page)
+ return;
+ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4);
+ gdb_regs[_ESP] = gdb_regs[_EBP] + 8;
+ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP];
+ if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched)
+ return;
+ } while (count++ < 16);
+ return;
+}
+
+/* Indicate to caller of mem2hex or hex2mem that there has been an
+ error. */
+static volatile int mem_err = 0;
+static volatile int mem_err_expected = 0;
+static volatile int mem_err_cnt = 0;
+static int garbage_loc = -1;
+
+int
+get_char(char *addr)
+{
+ return *addr;
+}
+
+void
+set_char(char *addr, int val, int may_fault)
+{
+ /*
+ * This code traps references to the area mapped to the kernel
+ * stack as given by the regs and, instead, stores to the
+ * fn_call_lookaside[cpu].array
+ */
+ if (may_fault &&
+ (unsigned int) addr < OLD_esp &&
+ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) {
+ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr);
+ }
+ *addr = val;
+}
+
+/* convert the memory pointed to by mem into hex, placing result in buf */
+/* return a pointer to the last char put in buf (null) */
+/* If MAY_FAULT is non-zero, then we should set mem_err in response to
+ a fault; if zero treat a fault like any other fault in the stub. */
+char *
+mem2hex(char *mem, char *buf, int count, int may_fault)
+{
+ int i;
+ unsigned char ch;
+
+ if (may_fault) {
+ mem_err_expected = 1;
+ mem_err = 0;
+ }
+ for (i = 0; i < count; i++) {
+ /* printk("%lx = ", mem) ; */
+
+ ch = get_char(mem++);
+
+ /* printk("%02x\n", ch & 0xFF) ; */
+ if (may_fault && mem_err) {
+ if (remote_debug)
+ printk("Mem fault fetching from addr %lx\n",
+ (long) (mem - 1));
+ *buf = 0; /* truncate buffer */
+ return (buf);
+ }
+ *buf++ = hexchars[ch >> 4];
+ *buf++ = hexchars[ch % 16];
+ }
+ *buf = 0;
+ if (may_fault)
+ mem_err_expected = 0;
+ return (buf);
+}
+
+/* convert the hex array pointed to by buf into binary to be placed in mem */
+/* return a pointer to the character AFTER the last byte written */
+/* NOTE: We use the may fault flag to also indicate if the write is to
+ * the registers (0) or "other" memory (!=0)
+ */
+char *
+hex2mem(char *buf, char *mem, int count, int may_fault)
+{
+ int i;
+ unsigned char ch;
+
+ if (may_fault) {
+ mem_err_expected = 1;
+ mem_err = 0;
+ }
+ for (i = 0; i < count; i++) {
+ ch = hex(*buf++) << 4;
+ ch = ch + hex(*buf++);
+ set_char(mem++, ch, may_fault);
+
+ if (may_fault && mem_err) {
+ if (remote_debug)
+ printk("Mem fault storing to addr %lx\n",
+ (long) (mem - 1));
+ return (mem);
+ }
+ }
+ if (may_fault)
+ mem_err_expected = 0;
+ return (mem);
+}
+
+/**********************************************/
+/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */
+/* RETURN NUMBER OF CHARS PROCESSED */
+/**********************************************/
+int
+hexToInt(char **ptr, int *intValue)
+{
+ int numChars = 0;
+ int hexValue;
+
+ *intValue = 0;
+
+ while (**ptr) {
+ hexValue = hex(**ptr);
+ if (hexValue >= 0) {
+ *intValue = (*intValue << 4) | hexValue;
+ numChars++;
+ } else
+ break;
+
+ (*ptr)++;
+ }
+
+ return (numChars);
+}
+
+#define stubhex(h) hex(h)
+#ifdef old_thread_list
+
+static int
+stub_unpack_int(char *buff, int fieldlength)
+{
+ int nibble;
+ int retval = 0;
+
+ while (fieldlength) {
+ nibble = stubhex(*buff++);
+ retval |= nibble;
+ fieldlength--;
+ if (fieldlength)
+ retval = retval << 4;
+ }
+ return retval;
+}
+#endif
+static char *
+pack_hex_byte(char *pkt, int byte)
+{
+ *pkt++ = hexchars[(byte >> 4) & 0xf];
+ *pkt++ = hexchars[(byte & 0xf)];
+ return pkt;
+}
+
+#define BUF_THREAD_ID_SIZE 16
+
+static char *
+pack_threadid(char *pkt, threadref * id)
+{
+ char *limit;
+ unsigned char *altid;
+
+ altid = (unsigned char *) id;
+ limit = pkt + BUF_THREAD_ID_SIZE;
+ while (pkt < limit)
+ pkt = pack_hex_byte(pkt, *altid++);
+ return pkt;
+}
+
+#ifdef old_thread_list
+static char *
+unpack_byte(char *buf, int *value)
+{
+ *value = stub_unpack_int(buf, 2);
+ return buf + 2;
+}
+
+static char *
+unpack_threadid(char *inbuf, threadref * id)
+{
+ char *altref;
+ char *limit = inbuf + BUF_THREAD_ID_SIZE;
+ int x, y;
+
+ altref = (char *) id;
+
+ while (inbuf < limit) {
+ x = stubhex(*inbuf++);
+ y = stubhex(*inbuf++);
+ *altref++ = (x << 4) | y;
+ }
+ return inbuf;
+}
+#endif
+void
+int_to_threadref(threadref * id, int value)
+{
+ unsigned char *scan;
+
+ scan = (unsigned char *) id;
+ {
+ int i = 4;
+ while (i--)
+ *scan++ = 0;
+ }
+ *scan++ = (value >> 24) & 0xff;
+ *scan++ = (value >> 16) & 0xff;
+ *scan++ = (value >> 8) & 0xff;
+ *scan++ = (value & 0xff);
+}
+int
+int_to_hex_v(unsigned char * id, int value)
+{
+ unsigned char *start = id;
+ int shift;
+ int ch;
+
+ for (shift = 28; shift >= 0; shift -= 4) {
+ if ((ch = (value >> shift) & 0xf) || (id != start)) {
+ *id = hexchars[ch];
+ id++;
+ }
+ }
+ if (id == start)
+ *id++ = '0';
+ return id - start;
+}
+#ifdef old_thread_list
+
+static int
+threadref_to_int(threadref * ref)
+{
+ int i, value = 0;
+ unsigned char *scan;
+
+ scan = (char *) ref;
+ scan += 4;
+ i = 4;
+ while (i-- > 0)
+ value = (value << 8) | ((*scan++) & 0xff);
+ return value;
+}
+#endif
+static int
+cmp_str(char *s1, char *s2, int count)
+{
+ while (count--) {
+ if (*s1++ != *s2++)
+ return 0;
+ }
+ return 1;
+}
+
+#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */
+extern struct task_struct *kgdb_get_idle(int cpu);
+#define idle_task(cpu) kgdb_get_idle(cpu)
+#else
+#define idle_task(cpu) init_tasks[cpu]
+#endif
+
+extern int kgdb_pid_init_done;
+
+struct task_struct *
+getthread(int pid)
+{
+ struct task_struct *thread;
+ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) {
+
+ return idle_task(pid - PID_MAX);
+ } else {
+ /*
+ * find_task_by_pid is relatively safe all the time
+ * Other pid functions require lock downs which imply
+ * that we may be interrupting them (as we get here
+ * in the middle of most any lock down).
+ * Still we don't want to call until the table exists!
+ */
+ if (kgdb_pid_init_done){
+ thread = find_task_by_pid(pid);
+ if (thread) {
+ return thread;
+ }
+ }
+ }
+ return NULL;
+}
+/* *INDENT-OFF* */
+struct hw_breakpoint {
+ unsigned enabled;
+ unsigned type;
+ unsigned len;
+ unsigned addr;
+} breakinfo[4] = { {enabled:0},
+ {enabled:0},
+ {enabled:0},
+ {enabled:0}};
+/* *INDENT-ON* */
+unsigned hw_breakpoint_status;
+void
+correct_hw_break(void)
+{
+ int breakno;
+ int correctit;
+ int breakbit;
+ unsigned dr7;
+
+ asm volatile ("movl %%db7, %0\n":"=r" (dr7)
+ :);
+ /* *INDENT-OFF* */
+ do {
+ unsigned addr0, addr1, addr2, addr3;
+ asm volatile ("movl %%db0, %0\n"
+ "movl %%db1, %1\n"
+ "movl %%db2, %2\n"
+ "movl %%db3, %3\n"
+ :"=r" (addr0), "=r"(addr1),
+ "=r"(addr2), "=r"(addr3)
+ :);
+ } while (0);
+ /* *INDENT-ON* */
+ correctit = 0;
+ for (breakno = 0; breakno < 3; breakno++) {
+ breakbit = 2 << (breakno << 1);
+ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
+ correctit = 1;
+ dr7 |= breakbit;
+ dr7 &= ~(0xf0000 << (breakno << 2));
+ dr7 |= (((breakinfo[breakno].len << 2) |
+ breakinfo[breakno].type) << 16) <<
+ (breakno << 2);
+ switch (breakno) {
+ case 0:
+ asm volatile ("movl %0, %%dr0\n"::"r"
+ (breakinfo[breakno].addr));
+ break;
+
+ case 1:
+ asm volatile ("movl %0, %%dr1\n"::"r"
+ (breakinfo[breakno].addr));
+ break;
+
+ case 2:
+ asm volatile ("movl %0, %%dr2\n"::"r"
+ (breakinfo[breakno].addr));
+ break;
+
+ case 3:
+ asm volatile ("movl %0, %%dr3\n"::"r"
+ (breakinfo[breakno].addr));
+ break;
+ }
+ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
+ correctit = 1;
+ dr7 &= ~breakbit;
+ dr7 &= ~(0xf0000 << (breakno << 2));
+ }
+ }
+ if (correctit) {
+ asm volatile ("movl %0, %%db7\n"::"r" (dr7));
+ }
+}
+
+int
+remove_hw_break(unsigned breakno)
+{
+ if (!breakinfo[breakno].enabled) {
+ return -1;
+ }
+ breakinfo[breakno].enabled = 0;
+ return 0;
+}
+
+int
+set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr)
+{
+ if (breakinfo[breakno].enabled) {
+ return -1;
+ }
+ breakinfo[breakno].enabled = 1;
+ breakinfo[breakno].type = type;
+ breakinfo[breakno].len = len;
+ breakinfo[breakno].addr = addr;
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+static int in_kgdb_console = 0;
+
+int
+in_kgdb(struct pt_regs *regs)
+{
+ unsigned flags;
+ int cpu = smp_processor_id();
+ in_kgdb_called = 1;
+ if (!spin_is_locked(&kgdb_spinlock)) {
+ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */
+ in_kgdb_console) { /* or we are doing slow i/o */
+ return 1;
+ }
+ return 0;
+ }
+
+ /* As I see it the only reason not to let all cpus spin on
+ * the same spin_lock is to allow selected ones to proceed.
+ * This would be a good thing, so we leave it this way.
+ * Maybe someday.... Done !
+
+ * in_kgdb() is called from an NMI so we don't pretend
+ * to have any resources, like printk() for example.
+ */
+
+ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */
+ /*
+ * log arival of this cpu
+ * The NMI keeps on ticking. Protect against recurring more
+ * than once, and ignor the cpu that has the kgdb lock
+ */
+ in_kgdb_entry_log[cpu]++;
+ in_kgdb_here_log[cpu] = regs;
+ if (cpu == spinlock_cpu || waiting_cpus[cpu].task)
+ goto exit_in_kgdb;
+
+ /*
+ * For protection of the initilization of the spin locks by kgdb
+ * it locks the kgdb spinlock before it gets the wait locks set
+ * up. We wait here for the wait lock to be taken. If the
+ * kgdb lock goes away first?? Well, it could be a slow exit
+ * sequence where the wait lock is removed prior to the kgdb lock
+ * so if kgdb gets unlocked, we just exit.
+ */
+
+ while (spin_is_locked(&kgdb_spinlock) &&
+ !spin_is_locked(waitlocks + cpu)) ;
+ if (!spin_is_locked(&kgdb_spinlock))
+ goto exit_in_kgdb;
+
+ waiting_cpus[cpu].task = current;
+ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu);
+ waiting_cpus[cpu].regs = regs;
+
+ spin_unlock_wait(waitlocks + cpu);
+
+ /*
+ * log departure of this cpu
+ */
+ waiting_cpus[cpu].task = 0;
+ waiting_cpus[cpu].pid = 0;
+ waiting_cpus[cpu].regs = 0;
+ correct_hw_break();
+ exit_in_kgdb:
+ in_kgdb_here_log[cpu] = 0;
+ kgdb_local_irq_restore(flags);
+ return 1;
+ /*
+ spin_unlock(continuelocks + smp_processor_id());
+ */
+}
+
+void
+smp__in_kgdb(struct pt_regs regs)
+{
+ ack_APIC_irq();
+ in_kgdb(®s);
+}
+#else
+int
+in_kgdb(struct pt_regs *regs)
+{
+ return (kgdb_spinlock);
+}
+#endif
+
+void
+printexceptioninfo(int exceptionNo, int errorcode, char *buffer)
+{
+ unsigned dr6;
+ int i;
+ switch (exceptionNo) {
+ case 1: /* debug exception */
+ break;
+ case 3: /* breakpoint */
+ sprintf(buffer, "Software breakpoint");
+ return;
+ default:
+ sprintf(buffer, "Details not available");
+ return;
+ }
+ asm volatile ("movl %%db6, %0\n":"=r" (dr6)
+ :);
+ if (dr6 & 0x4000) {
+ sprintf(buffer, "Single step");
+ return;
+ }
+ for (i = 0; i < 4; ++i) {
+ if (dr6 & (1 << i)) {
+ sprintf(buffer, "Hardware breakpoint %d", i);
+ return;
+ }
+ }
+ sprintf(buffer, "Unknown trap");
+ return;
+}
+
+/*
+ * This function does all command procesing for interfacing to gdb.
+ *
+ * NOTE: The INT nn instruction leaves the state of the interrupt
+ * enable flag UNCHANGED. That means that when this routine
+ * is entered via a breakpoint (INT 3) instruction from code
+ * that has interrupts enabled, then interrupts will STILL BE
+ * enabled when this routine is entered. The first thing that
+ * we do here is disable interrupts so as to prevent recursive
+ * entries and bothersome serial interrupts while we are
+ * trying to run the serial port in polled mode.
+ *
+ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so
+ * it is always necessary to do a restore_flags before returning
+ * so as to let go of that lock.
+ */
+int
+kgdb_handle_exception(int exceptionVector,
+ int signo, int err_code, struct pt_regs *linux_regs)
+{
+ struct task_struct *usethread = NULL;
+ struct task_struct *thread_list_start = 0, *thread = NULL;
+ int addr, length;
+ int breakno, breaktype;
+ char *ptr;
+ int newPC;
+ threadref thref;
+ int threadid;
+ int thread_min = PID_MAX + MAX_NO_CPUS;
+#ifdef old_thread_list
+ int maxthreads;
+#endif
+ int nothreads;
+ unsigned long flags;
+ int gdb_regs[NUMREGBYTES / 4];
+ int dr6;
+ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */
+#define NO_NMI 1
+#define NO_SYNC 2
+#define regs (*linux_regs)
+#define NUMREGS NUMREGBYTES/4
+ /*
+ * If the entry is not from the kernel then return to the Linux
+ * trap handler and let it process the interrupt normally.
+ */
+ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) {
+ printk("ignoring non-kernel exception\n");
+ print_regs(®s);
+ return (0);
+ }
+ /*
+ * If we're using eth mode, set the 'mode' in the netdevice.
+ */
+
+ if (kgdboe)
+ netpoll_set_trap(1);
+
+ kgdb_local_irq_save(flags);
+
+ /* Get kgdb spinlock */
+
+ KGDB_SPIN_LOCK(&kgdb_spinlock);
+ rdtscll(kgdb_info.entry_tsc);
+ /*
+ * We depend on this spinlock and the NMI watch dog to control the
+ * other cpus. They will arrive at "in_kgdb()" as a result of the
+ * NMI and will wait there for the following spin locks to be
+ * released.
+ */
+#ifdef CONFIG_SMP
+
+#if 0
+ if (cpu_callout_map & ~MAX_CPU_MASK) {
+ printk("kgdb : too many cpus, possibly not mapped"
+ " in contiguous space, change MAX_NO_CPUS"
+ " in kgdb_stub and make new kernel.\n"
+ " cpu_callout_map is %lx\n", cpu_callout_map);
+ goto exit_just_unlock;
+ }
+#endif
+ if (spinlock_count == 1) {
+ int time = 0, end_time, dum = 0;
+ int i;
+ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0)
+ };
+ if (remote_debug) {
+ printk("kgdb : cpu %d entry, syncing others\n",
+ smp_processor_id());
+ }
+ for (i = 0; i < MAX_NO_CPUS; i++) {
+ /*
+ * Use trylock as we may already hold the lock if
+ * we are holding the cpu. Net result is all
+ * locked.
+ */
+ spin_trylock(&waitlocks[i]);
+ }
+ for (i = 0; i < MAX_NO_CPUS; i++)
+ cpu_logged_in[i] = 0;
+ /*
+ * Wait for their arrival. We know the watch dog is active if
+ * in_kgdb() has ever been called, as it is always called on a
+ * watchdog tick.
+ */
+ rdtsc(dum, time);
+ end_time = time + 2; /* Note: we use the High order bits! */
+ i = 1;
+ if (num_online_cpus() > 1) {
+ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()];
+ smp_send_nmi_allbutself();
+
+ while (i < num_online_cpus() && time != end_time) {
+ int j;
+ for (j = 0; j < MAX_NO_CPUS; j++) {
+ if (waiting_cpus[j].task &&
+ waiting_cpus[j].task != NOCPU &&
+ !cpu_logged_in[j]) {
+ i++;
+ cpu_logged_in[j] = 1;
+ if (remote_debug) {
+ printk
+ ("kgdb : cpu %d arrived at kgdb\n",
+ j);
+ }
+ break;
+ } else if (!waiting_cpus[j].task &&
+ !cpu_online(j)) {
+ waiting_cpus[j].task = NOCPU;
+ cpu_logged_in[j] = 1;
+ waiting_cpus[j].hold = 1;
+ break;
+ }
+ if (!waiting_cpus[j].task &&
+ in_kgdb_here_log[j]) {
+
+ int wait = 100000;
+ while (wait--) ;
+ if (!waiting_cpus[j].task &&
+ in_kgdb_here_log[j]) {
+ printk
+ ("kgdb : cpu %d stall"
+ " in in_kgdb\n",
+ j);
+ i++;
+ cpu_logged_in[j] = 1;
+ waiting_cpus[j].task =
+ (struct task_struct
+ *) 1;
+ }
+ }
+ }
+
+ if (in_kgdb_entry_log[smp_processor_id()] >
+ (me_in_kgdb + 10)) {
+ break;
+ }
+
+ rdtsc(dum, time);
+ }
+ if (i < num_online_cpus()) {
+ printk
+ ("kgdb : time out, proceeding without sync\n");
+#if 0
+ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n",
+ waiting_cpus[0].task != 0,
+ waiting_cpus[1].task != 0);
+ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n",
+ cpu_logged_in[0], cpu_logged_in[1]);
+ printk
+ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n",
+ in_kgdb_here_log[0] != 0,
+ in_kgdb_here_log[1] != 0);
+#endif
+ entry_state = NO_SYNC;
+ } else {
+#if 0
+ int ent =
+ in_kgdb_entry_log[smp_processor_id()] -
+ me_in_kgdb;
+ printk("kgdb : sync after %d entries\n", ent);
+#endif
+ }
+ } else {
+ if (remote_debug) {
+ printk
+ ("kgdb : %d cpus, but watchdog not active\n"
+ "proceeding without locking down other cpus\n",
+ num_online_cpus());
+ entry_state = NO_NMI;
+ }
+ }
+ }
+#endif
+
+ if (remote_debug) {
+ unsigned long *lp = (unsigned long *) &linux_regs;
+
+ printk("handle_exception(exceptionVector=%d, "
+ "signo=%d, err_code=%d, linux_regs=%p)\n",
+ exceptionVector, signo, err_code, linux_regs);
+ if (debug_regs) {
+ print_regs(®s);
+ printk("Stk: %8lx %8lx %8lx %8lx"
+ " %8lx %8lx %8lx %8lx\n",
+ lp[0], lp[1], lp[2], lp[3],
+ lp[4], lp[5], lp[6], lp[7]);
+ printk(" %8lx %8lx %8lx %8lx"
+ " %8lx %8lx %8lx %8lx\n",
+ lp[8], lp[9], lp[10], lp[11],
+ lp[12], lp[13], lp[14], lp[15]);
+ printk(" %8lx %8lx %8lx %8lx "
+ "%8lx %8lx %8lx %8lx\n",
+ lp[16], lp[17], lp[18], lp[19],
+ lp[20], lp[21], lp[22], lp[23]);
+ printk(" %8lx %8lx %8lx %8lx "
+ "%8lx %8lx %8lx %8lx\n",
+ lp[24], lp[25], lp[26], lp[27],
+ lp[28], lp[29], lp[30], lp[31]);
+ }
+ }
+
+ /* Disable hardware debugging while we are in kgdb */
+ /* Get the debug register status register */
+/* *INDENT-OFF* */
+ __asm__("movl %0,%%db7"
+ : /* no output */
+ :"r"(0));
+
+ asm volatile ("movl %%db6, %0\n"
+ :"=r" (hw_breakpoint_status)
+ :);
+
+/* *INDENT-ON* */
+ switch (exceptionVector) {
+ case 0: /* divide error */
+ case 1: /* debug exception */
+ case 2: /* NMI */
+ case 3: /* breakpoint */
+ case 4: /* overflow */
+ case 5: /* bounds check */
+ case 6: /* invalid opcode */
+ case 7: /* device not available */
+ case 8: /* double fault (errcode) */
+ case 10: /* invalid TSS (errcode) */
+ case 12: /* stack fault (errcode) */
+ case 16: /* floating point error */
+ case 17: /* alignment check (errcode) */
+ default: /* any undocumented */
+ break;
+ case 11: /* segment not present (errcode) */
+ case 13: /* general protection (errcode) */
+ case 14: /* page fault (special errcode) */
+ case 19: /* cache flush denied */
+ if (mem_err_expected) {
+ /*
+ * This fault occured because of the
+ * get_char or set_char routines. These
+ * two routines use either eax of edx to
+ * indirectly reference the location in
+ * memory that they are working with.
+ * For a page fault, when we return the
+ * instruction will be retried, so we
+ * have to make sure that these
+ * registers point to valid memory.
+ */
+ mem_err = 1; /* set mem error flag */
+ mem_err_expected = 0;
+ mem_err_cnt++; /* helps in debugging */
+ /* make valid address */
+ regs.eax = (long) &garbage_loc;
+ /* make valid address */
+ regs.edx = (long) &garbage_loc;
+ if (remote_debug)
+ printk("Return after memory error: "
+ "mem_err_cnt=%d\n", mem_err_cnt);
+ if (debug_regs)
+ print_regs(®s);
+ goto exit_kgdb;
+ }
+ break;
+ }
+ if (remote_debug)
+ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id());
+
+ gdb_i386vector = exceptionVector;
+ gdb_i386errcode = err_code;
+ kgdb_info.called_from = __builtin_return_address(0);
+#ifdef CONFIG_SMP
+ /*
+ * OK, we can now communicate, lets tell gdb about the sync.
+ * but only if we had a problem.
+ */
+ switch (entry_state) {
+ case NO_NMI:
+ to_gdb("NMI not active, other cpus not stopped\n");
+ break;
+ case NO_SYNC:
+ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n");
+ default:;
+ }
+
+#endif
+/*
+ * Set up the gdb function call area.
+ */
+ trap_cpu = smp_processor_id();
+ OLD_esp = NEW_esp = (int) (&linux_regs->esp);
+
+ IF_SMP(once_again:)
+ /* reply to host that an exception has occurred */
+ remcomOutBuffer[0] = 'S';
+ remcomOutBuffer[1] = hexchars[signo >> 4];
+ remcomOutBuffer[2] = hexchars[signo % 16];
+ remcomOutBuffer[3] = 0;
+
+ putpacket(remcomOutBuffer);
+
+ while (1 == 1) {
+ error = 0;
+ remcomOutBuffer[0] = 0;
+ getpacket(remcomInBuffer);
+ switch (remcomInBuffer[0]) {
+ case '?':
+ remcomOutBuffer[0] = 'S';
+ remcomOutBuffer[1] = hexchars[signo >> 4];
+ remcomOutBuffer[2] = hexchars[signo % 16];
+ remcomOutBuffer[3] = 0;
+ break;
+ case 'd':
+ remote_debug = !(remote_debug); /* toggle debug flag */
+ printk("Remote debug %s\n",
+ remote_debug ? "on" : "off");
+ break;
+ case 'g': /* return the value of the CPU registers */
+ get_gdb_regs(usethread, ®s, gdb_regs);
+ mem2hex((char *) gdb_regs,
+ remcomOutBuffer, NUMREGBYTES, 0);
+ break;
+ case 'G': /* set the value of the CPU registers - return OK */
+ hex2mem(&remcomInBuffer[1],
+ (char *) gdb_regs, NUMREGBYTES, 0);
+ if (!usethread || usethread == current) {
+ gdb_regs_to_regs(gdb_regs, ®s);
+ strcpy(remcomOutBuffer, "OK");
+ } else {
+ strcpy(remcomOutBuffer, "E00");
+ }
+ break;
+
+ case 'P':{ /* set the value of a single CPU register -
+ return OK */
+ /*
+ * For some reason, gdb wants to talk about psudo
+ * registers (greater than 15). These may have
+ * meaning for ptrace, but for us it is safe to
+ * ignor them. We do this by dumping them into
+ * _GS which we also ignor, but do have memory for.
+ */
+ int regno;
+
+ ptr = &remcomInBuffer[1];
+ regs_to_gdb_regs(gdb_regs, ®s);
+ if ((!usethread || usethread == current) &&
+ hexToInt(&ptr, ®no) &&
+ *ptr++ == '=' && (regno >= 0)) {
+ regno =
+ (regno >= NUMREGS ? _GS : regno);
+ hex2mem(ptr, (char *) &gdb_regs[regno],
+ 4, 0);
+ gdb_regs_to_regs(gdb_regs, ®s);
+ strcpy(remcomOutBuffer, "OK");
+ break;
+ }
+ strcpy(remcomOutBuffer, "E01");
+ break;
+ }
+
+ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
+ case 'm':
+ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */
+ ptr = &remcomInBuffer[1];
+ if (hexToInt(&ptr, &addr) &&
+ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) {
+ ptr = 0;
+ /*
+ * hex doubles the byte count
+ */
+ if (length > (BUFMAX / 2))
+ length = BUFMAX / 2;
+ mem2hex((char *) addr,
+ remcomOutBuffer, length, 1);
+ if (mem_err) {
+ strcpy(remcomOutBuffer, "E03");
+ debug_error("memory fault\n", NULL);
+ }
+ }
+
+ if (ptr) {
+ strcpy(remcomOutBuffer, "E01");
+ debug_error
+ ("malformed read memory command: %s\n",
+ remcomInBuffer);
+ }
+ break;
+
+ /* MAA..AA,LLLL:
+ Write LLLL bytes at address AA.AA return OK */
+ case 'M':
+ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */
+ ptr = &remcomInBuffer[1];
+ if (hexToInt(&ptr, &addr) &&
+ (*(ptr++) == ',') &&
+ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) {
+ hex2mem(ptr, (char *) addr, length, 1);
+
+ if (mem_err) {
+ strcpy(remcomOutBuffer, "E03");
+ debug_error("memory fault\n", NULL);
+ } else {
+ strcpy(remcomOutBuffer, "OK");
+ }
+
+ ptr = 0;
+ }
+ if (ptr) {
+ strcpy(remcomOutBuffer, "E02");
+ debug_error
+ ("malformed write memory command: %s\n",
+ remcomInBuffer);
+ }
+ break;
+ case 'S':
+ remcomInBuffer[0] = 's';
+ case 'C':
+ /* Csig;AA..AA where ;AA..AA is optional
+ * continue with signal
+ * Since signals are meaning less to us, delete that
+ * part and then fall into the 'c' code.
+ */
+ ptr = &remcomInBuffer[1];
+ length = 2;
+ while (*ptr && *ptr != ';') {
+ length++;
+ ptr++;
+ }
+ if (*ptr) {
+ do {
+ ptr++;
+ *(ptr - length++) = *ptr;
+ } while (*ptr);
+ } else {
+ remcomInBuffer[1] = 0;
+ }
+
+ /* cAA..AA Continue at address AA..AA(optional) */
+ /* sAA..AA Step one instruction from AA..AA(optional) */
+ /* D detach, reply OK and then continue */
+ case 'c':
+ case 's':
+ case 'D':
+
+ /* try to read optional parameter,
+ pc unchanged if no parm */
+ ptr = &remcomInBuffer[1];
+ if (hexToInt(&ptr, &addr)) {
+ if (remote_debug)
+ printk("Changing EIP to 0x%x\n", addr);
+
+ regs.eip = addr;
+ }
+
+ newPC = regs.eip;
+
+ /* clear the trace bit */
+ regs.eflags &= 0xfffffeff;
+
+ /* set the trace bit if we're stepping */
+ if (remcomInBuffer[0] == 's')
+ regs.eflags |= 0x100;
+
+ /* detach is a friendly version of continue. Note that
+ debugging is still enabled (e.g hit control C)
+ */
+ if (remcomInBuffer[0] == 'D') {
+ strcpy(remcomOutBuffer, "OK");
+ putpacket(remcomOutBuffer);
+ }
+
+ if (remote_debug) {
+ printk("Resuming execution\n");
+ print_regs(®s);
+ }
+ asm volatile ("movl %%db6, %0\n":"=r" (dr6)
+ :);
+ if (!(dr6 & 0x4000)) {
+ for (breakno = 0; breakno < 4; ++breakno) {
+ if (dr6 & (1 << breakno) &&
+ (breakinfo[breakno].type == 0)) {
+ /* Set restore flag */
+ regs.eflags |= 0x10000;
+ break;
+ }
+ }
+ }
+
+ if (kgdboe)
+ netpoll_set_trap(0);
+
+ correct_hw_break();
+ asm volatile ("movl %0, %%db6\n"::"r" (0));
+ goto exit_kgdb;
+
+ /* kill the program */
+ case 'k': /* do nothing */
+ break;
+
+ /* query */
+ case 'q':
+ nothreads = 0;
+ switch (remcomInBuffer[1]) {
+ case 'f':
+ threadid = 1;
+ thread_list = 2;
+ thread_list_start = (usethread ? : current);
+ case 's':
+ if (!cmp_str(&remcomInBuffer[2],
+ "ThreadInfo", 10))
+ break;
+
+ remcomOutBuffer[nothreads++] = 'm';
+ for (; threadid < PID_MAX + MAX_NO_CPUS;
+ threadid++) {
+ thread = getthread(threadid);
+ if (thread) {
+ nothreads += int_to_hex_v(
+ &remcomOutBuffer[
+ nothreads],
+ threadid);
+ if (thread_min > threadid)
+ thread_min = threadid;
+ remcomOutBuffer[
+ nothreads] = ',';
+ nothreads++;
+ if (nothreads > BUFMAX - 10)
+ break;
+ }
+ }
+ if (remcomOutBuffer[nothreads - 1] == 'm') {
+ remcomOutBuffer[nothreads - 1] = 'l';
+ } else {
+ nothreads--;
+ }
+ remcomOutBuffer[nothreads] = 0;
+ break;
+
+#ifdef old_thread_list /* Old thread info request */
+ case 'L':
+ /* List threads */
+ thread_list = 2;
+ thread_list_start = (usethread ? : current);
+ unpack_byte(remcomInBuffer + 3, &maxthreads);
+ unpack_threadid(remcomInBuffer + 5, &thref);
+ do {
+ int buf_thread_limit =
+ (BUFMAX - 22) / BUF_THREAD_ID_SIZE;
+ if (maxthreads > buf_thread_limit) {
+ maxthreads = buf_thread_limit;
+ }
+ } while (0);
+ remcomOutBuffer[0] = 'q';
+ remcomOutBuffer[1] = 'M';
+ remcomOutBuffer[4] = '0';
+ pack_threadid(remcomOutBuffer + 5, &thref);
+
+ threadid = threadref_to_int(&thref);
+ for (nothreads = 0;
+ nothreads < maxthreads &&
+ threadid < PID_MAX + MAX_NO_CPUS;
+ threadid++) {
+ thread = getthread(threadid);
+ if (thread) {
+ int_to_threadref(&thref,
+ threadid);
+ pack_threadid(remcomOutBuffer +
+ 21 +
+ nothreads * 16,
+ &thref);
+ nothreads++;
+ if (thread_min > threadid)
+ thread_min = threadid;
+ }
+ }
+
+ if (threadid == PID_MAX + MAX_NO_CPUS) {
+ remcomOutBuffer[4] = '1';
+ }
+ pack_hex_byte(remcomOutBuffer + 2, nothreads);
+ remcomOutBuffer[21 + nothreads * 16] = '\0';
+ break;
+#endif
+ case 'C':
+ /* Current thread id */
+ remcomOutBuffer[0] = 'Q';
+ remcomOutBuffer[1] = 'C';
+ threadid = current->pid;
+ if (!threadid) {
+ /*
+ * idle thread
+ */
+ for (threadid = PID_MAX;
+ threadid < PID_MAX + MAX_NO_CPUS;
+ threadid++) {
+ if (current ==
+ idle_task(threadid -
+ PID_MAX))
+ break;
+ }
+ }
+ int_to_threadref(&thref, threadid);
+ pack_threadid(remcomOutBuffer + 2, &thref);
+ remcomOutBuffer[18] = '\0';
+ break;
+
+ case 'E':
+ /* Print exception info */
+ printexceptioninfo(exceptionVector,
+ err_code, remcomOutBuffer);
+ break;
+ case 'T':{
+ char * nptr;
+ /* Thread extra info */
+ if (!cmp_str(&remcomInBuffer[2],
+ "hreadExtraInfo,", 15)) {
+ break;
+ }
+ ptr = &remcomInBuffer[17];
+ hexToInt(&ptr, &threadid);
+ thread = getthread(threadid);
+ nptr = &thread->comm[0];
+ length = 0;
+ ptr = &remcomOutBuffer[0];
+ do {
+ length++;
+ ptr = pack_hex_byte(ptr, *nptr++);
+ } while (*nptr && length < 16);
+ /*
+ * would like that 16 to be the size of
+ * task_struct.comm but don't know the
+ * syntax..
+ */
+ *ptr = 0;
+ }
+ }
+ break;
+
+ /* task related */
+ case 'H':
+ switch (remcomInBuffer[1]) {
+ case 'g':
+ ptr = &remcomInBuffer[2];
+ hexToInt(&ptr, &threadid);
+ thread = getthread(threadid);
+ if (!thread) {
+ remcomOutBuffer[0] = 'E';
+ remcomOutBuffer[1] = '\0';
+ break;
+ }
+ /*
+ * Just in case I forget what this is all about,
+ * the "thread info" command to gdb causes it
+ * to ask for a thread list. It then switches
+ * to each thread and asks for the registers.
+ * For this (and only this) usage, we want to
+ * fudge the registers of tasks not on the run
+ * list (i.e. waiting) to show the routine that
+ * called schedule. Also, gdb, is a minimalist
+ * in that if the current thread is the last
+ * it will not re-read the info when done.
+ * This means that in this case we must show
+ * the real registers. So here is how we do it:
+ * Each entry we keep track of the min
+ * thread in the list (the last that gdb will)
+ * get info for. We also keep track of the
+ * starting thread.
+ * "thread_list" is cleared when switching back
+ * to the min thread if it is was current, or
+ * if it was not current, thread_list is set
+ * to 1. When the switch to current comes,
+ * if thread_list is 1, clear it, else do
+ * nothing.
+ */
+ usethread = thread;
+ if ((thread_list == 1) &&
+ (thread == thread_list_start)) {
+ thread_list = 0;
+ }
+ if (thread_list && (threadid == thread_min)) {
+ if (thread == thread_list_start) {
+ thread_list = 0;
+ } else {
+ thread_list = 1;
+ }
+ }
+ /* follow through */
+ case 'c':
+ remcomOutBuffer[0] = 'O';
+ remcomOutBuffer[1] = 'K';
+ remcomOutBuffer[2] = '\0';
+ break;
+ }
+ break;
+
+ /* Query thread status */
+ case 'T':
+ ptr = &remcomInBuffer[1];
+ hexToInt(&ptr, &threadid);
+ thread = getthread(threadid);
+ if (thread) {
+ remcomOutBuffer[0] = 'O';
+ remcomOutBuffer[1] = 'K';
+ remcomOutBuffer[2] = '\0';
+ if (thread_min > threadid)
+ thread_min = threadid;
+ } else {
+ remcomOutBuffer[0] = 'E';
+ remcomOutBuffer[1] = '\0';
+ }
+ break;
+
+ case 'Y': /* set up a hardware breakpoint */
+ ptr = &remcomInBuffer[1];
+ hexToInt(&ptr, &breakno);
+ ptr++;
+ hexToInt(&ptr, &breaktype);
+ ptr++;
+ hexToInt(&ptr, &length);
+ ptr++;
+ hexToInt(&ptr, &addr);
+ if (set_hw_break(breakno & 0x3,
+ breaktype & 0x3,
+ length & 0x3, addr) == 0) {
+ strcpy(remcomOutBuffer, "OK");
+ } else {
+ strcpy(remcomOutBuffer, "ERROR");
+ }
+ break;
+
+ /* Remove hardware breakpoint */
+ case 'y':
+ ptr = &remcomInBuffer[1];
+ hexToInt(&ptr, &breakno);
+ if (remove_hw_break(breakno & 0x3) == 0) {
+ strcpy(remcomOutBuffer, "OK");
+ } else {
+ strcpy(remcomOutBuffer, "ERROR");
+ }
+ break;
+
+ case 'r': /* reboot */
+ strcpy(remcomOutBuffer, "OK");
+ putpacket(remcomOutBuffer);
+ /*to_gdb("Rebooting\n"); */
+ /* triplefault no return from here */
+ {
+ static long no_idt[2];
+ __asm__ __volatile__("lidt %0"::"m"(no_idt[0]));
+ BREAKPOINT;
+ }
+
+ } /* switch */
+
+ /* reply to the request */
+ putpacket(remcomOutBuffer);
+ } /* while(1==1) */
+ /*
+ * reached by goto only.
+ */
+ exit_kgdb:
+ /*
+ * Here is where we set up to trap a gdb function call. NEW_esp
+ * will be changed if we are trying to do this. We handle both
+ * adding and subtracting, thus allowing gdb to put grung on
+ * the stack which it removes later.
+ */
+ if (NEW_esp != OLD_esp) {
+ int *ptr = END_OF_LOOKASIDE;
+ if (NEW_esp < OLD_esp)
+ ptr -= (OLD_esp - NEW_esp) / sizeof (int);
+ *--ptr = linux_regs->eflags;
+ *--ptr = linux_regs->xcs;
+ *--ptr = linux_regs->eip;
+ *--ptr = linux_regs->ecx;
+ *--ptr = linux_regs->ebx;
+ *--ptr = linux_regs->eax;
+ linux_regs->ecx = NEW_esp - (sizeof (int) * 6);
+ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE;
+ if (NEW_esp < OLD_esp) {
+ linux_regs->eip = (unsigned int) fn_call_stub;
+ } else {
+ linux_regs->eip = (unsigned int) fn_rtn_stub;
+ linux_regs->eax = NEW_esp;
+ }
+ linux_regs->eflags &= ~(IF_BIT | TF_BIT);
+ }
+#ifdef CONFIG_SMP
+ /*
+ * Release gdb wait locks
+ * Sanity check time. Must have at least one cpu to run. Also single
+ * step must not be done if the current cpu is on hold.
+ */
+ if (spinlock_count == 1) {
+ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep;
+ int cpu_avail = 0;
+ int i;
+
+ for (i = 0; i < MAX_NO_CPUS; i++) {
+ if (!cpu_online(i))
+ break;
+ if (!hold_cpu(i)) {
+ cpu_avail = 1;
+ }
+ }
+ /*
+ * Early in the bring up there will be NO cpus on line...
+ */
+ if (!cpu_avail && !cpus_empty(cpu_online_map)) {
+ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n");
+ goto once_again;
+ }
+ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) {
+ to_gdb
+ ("Current cpu must be unblocked to single step\n");
+ goto once_again;
+ }
+ if (!(ss_hold)) {
+ int i;
+ for (i = 0; i < MAX_NO_CPUS; i++) {
+ if (!hold_cpu(i)) {
+ spin_unlock(&waitlocks[i]);
+ }
+ }
+ } else {
+ spin_unlock(&waitlocks[smp_processor_id()]);
+ }
+ /* Release kgdb spinlock */
+ KGDB_SPIN_UNLOCK(&kgdb_spinlock);
+ /*
+ * If this cpu is on hold, this is where we
+ * do it. Note, the NMI will pull us out of here,
+ * but will return as the above lock is not held.
+ * We will stay here till another cpu releases the lock for us.
+ */
+ spin_unlock_wait(waitlocks + smp_processor_id());
+ kgdb_local_irq_restore(flags);
+ return (0);
+ }
+#if 0
+exit_just_unlock:
+#endif
+#endif
+ /* Release kgdb spinlock */
+ KGDB_SPIN_UNLOCK(&kgdb_spinlock);
+ kgdb_local_irq_restore(flags);
+ return (0);
+}
+
+/* this function is used to set up exception handlers for tracing and
+ * breakpoints.
+ * This function is not needed as the above line does all that is needed.
+ * We leave it for backward compatitability...
+ */
+void
+set_debug_traps(void)
+{
+ /*
+ * linux_debug_hook is defined in traps.c. We store a pointer
+ * to our own exception handler into it.
+
+ * But really folks, every hear of labeled common, an old Fortran
+ * concept. Lots of folks can reference it and it is define if
+ * anyone does. Only one can initialize it at link time. We do
+ * this with the hook. See the statement above. No need for any
+ * executable code and it is ready as soon as the kernel is
+ * loaded. Very desirable in kernel debugging.
+
+ linux_debug_hook = handle_exception ;
+ */
+
+ /* In case GDB is started before us, ack any packets (presumably
+ "$?#xx") sitting there.
+ putDebugChar ('+');
+
+ initialized = 1;
+ */
+}
+
+/* This function will generate a breakpoint exception. It is used at the
+ beginning of a program to sync up with a debugger and can be used
+ otherwise as a quick means to stop program execution and "break" into
+ the debugger. */
+/* But really, just use the BREAKPOINT macro. We will handle the int stuff
+ */
+
+#ifdef later
+/*
+ * possibly we should not go thru the traps.c code at all? Someday.
+ */
+void
+do_kgdb_int3(struct pt_regs *regs, long error_code)
+{
+ kgdb_handle_exception(3, 5, error_code, regs);
+ return;
+}
+#endif
+#undef regs
+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS
+asmlinkage void
+bad_sys_call_exit(int stuff)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stuff;
+ printk("Sys call %d return with %x preempt_count\n",
+ (int) regs->orig_eax, preempt_count());
+}
+#endif
+#ifdef CONFIG_STACK_OVERFLOW_TEST
+#include
+asmlinkage void
+stack_overflow(void)
+{
+#ifdef BREAKPOINT
+ BREAKPOINT;
+#else
+ printk("Kernel stack overflow, looping forever\n");
+#endif
+ while (1) {
+ }
+}
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE)
+char gdbconbuf[BUFMAX];
+
+static void
+kgdb_gdb_message(const char *s, unsigned count)
+{
+ int i;
+ int wcount;
+ char *bufptr;
+ /*
+ * This takes care of NMI while spining out chars to gdb
+ */
+ IF_SMP(in_kgdb_console = 1);
+ gdbconbuf[0] = 'O';
+ bufptr = gdbconbuf + 1;
+ while (count > 0) {
+ if ((count << 1) > (BUFMAX - 2)) {
+ wcount = (BUFMAX - 2) >> 1;
+ } else {
+ wcount = count;
+ }
+ count -= wcount;
+ for (i = 0; i < wcount; i++) {
+ bufptr = pack_hex_byte(bufptr, s[i]);
+ }
+ *bufptr = '\0';
+ s += wcount;
+
+ putpacket(gdbconbuf);
+
+ }
+ IF_SMP(in_kgdb_console = 0);
+}
+#endif
+#ifdef CONFIG_SMP
+static void
+to_gdb(const char *s)
+{
+ int count = 0;
+ while (s[count] && (count++ < BUFMAX)) ;
+ kgdb_gdb_message(s, count);
+}
+#endif
+#ifdef CONFIG_KGDB_CONSOLE
+#include
+#include
+#include
+#include
+#include
+
+void
+kgdb_console_write(struct console *co, const char *s, unsigned count)
+{
+
+ if (gdb_i386vector == -1) {
+ /*
+ * We have not yet talked to gdb. What to do...
+ * lets break, on continue we can do the write.
+ * But first tell him whats up. Uh, well no can do,
+ * as this IS the console. Oh well...
+ * We do need to wait or the messages will be lost.
+ * Other option would be to tell the above code to
+ * ignore this breakpoint and do an auto return,
+ * but that might confuse gdb. Also this happens
+ * early enough in boot up that we don't have the traps
+ * set up yet, so...
+ */
+ breakpoint();
+ }
+ kgdb_gdb_message(s, count);
+}
+
+/*
+ * ------------------------------------------------------------
+ * Serial KGDB driver
+ * ------------------------------------------------------------
+ */
+
+static struct console kgdbcons = {
+ name:"kgdb",
+ write:kgdb_console_write,
+#ifdef CONFIG_KGDB_USER_CONSOLE
+ device:kgdb_console_device,
+#endif
+ flags:CON_PRINTBUFFER | CON_ENABLED,
+ index:-1,
+};
+
+/*
+ * The trick here is that this file gets linked before printk.o
+ * That means we get to peer at the console info in the command
+ * line before it does. If we are up, we register, otherwise,
+ * do nothing. By returning 0, we allow printk to look also.
+ */
+static int kgdb_console_enabled;
+
+int __init
+kgdb_console_init(char *str)
+{
+ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) {
+ register_console(&kgdbcons);
+ kgdb_console_enabled = 1;
+ }
+ return 0; /* let others look at the string */
+}
+
+__setup("console=", kgdb_console_init);
+
+#ifdef CONFIG_KGDB_USER_CONSOLE
+static kdev_t kgdb_console_device(struct console *c);
+/* This stuff sort of works, but it knocks out telnet devices
+ * we are leaving it here in case we (or you) find time to figure it out
+ * better..
+ */
+
+/*
+ * We need a real char device as well for when the console is opened for user
+ * space activities.
+ */
+
+static int
+kgdb_consdev_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static ssize_t
+kgdb_consdev_write(struct file *file, const char *buf,
+ size_t count, loff_t * ppos)
+{
+ int size, ret = 0;
+ static char kbuf[128];
+ static DECLARE_MUTEX(sem);
+
+ /* We are not reentrant... */
+ if (down_interruptible(&sem))
+ return -ERESTARTSYS;
+
+ while (count > 0) {
+ /* need to copy the data from user space */
+ size = count;
+ if (size > sizeof (kbuf))
+ size = sizeof (kbuf);
+ if (copy_from_user(kbuf, buf, size)) {
+ ret = -EFAULT;
+ break;;
+ }
+ kgdb_console_write(&kgdbcons, kbuf, size);
+ count -= size;
+ ret += size;
+ buf += size;
+ }
+
+ up(&sem);
+
+ return ret;
+}
+
+struct file_operations kgdb_consdev_fops = {
+ open:kgdb_consdev_open,
+ write:kgdb_consdev_write
+};
+static kdev_t
+kgdb_console_device(struct console *c)
+{
+ return MKDEV(TTYAUX_MAJOR, 1);
+}
+
+/*
+ * This routine gets called from the serial stub in the i386/lib
+ * This is so it is done late in bring up (just before the console open).
+ */
+void
+kgdb_console_finit(void)
+{
+ if (kgdb_console_enabled) {
+ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1));
+ char *cp = cptr;
+ while (*cptr && *cptr != '(')
+ cptr++;
+ *cptr = 0;
+ unregister_chrdev(TTYAUX_MAJOR, cp);
+ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops);
+ }
+}
+#endif
+#endif
+#ifdef CONFIG_KGDB_TS
+#include /* time stamp code */
+#include /* in_interrupt */
+#ifdef CONFIG_KGDB_TS_64
+#define DATA_POINTS 64
+#endif
+#ifdef CONFIG_KGDB_TS_128
+#define DATA_POINTS 128
+#endif
+#ifdef CONFIG_KGDB_TS_256
+#define DATA_POINTS 256
+#endif
+#ifdef CONFIG_KGDB_TS_512
+#define DATA_POINTS 512
+#endif
+#ifdef CONFIG_KGDB_TS_1024
+#define DATA_POINTS 1024
+#endif
+#ifndef DATA_POINTS
+#define DATA_POINTS 128 /* must be a power of two */
+#endif
+#define INDEX_MASK (DATA_POINTS - 1)
+#if (INDEX_MASK & DATA_POINTS)
+#error "CONFIG_KGDB_TS_COUNT must be a power of 2"
+#endif
+struct kgdb_and_then_struct {
+#ifdef CONFIG_SMP
+ int on_cpu;
+#endif
+ struct task_struct *task;
+ long long at_time;
+ int from_ln;
+ char *in_src;
+ void *from;
+ int *with_shpf;
+ int data0;
+ int data1;
+};
+struct kgdb_and_then_struct2 {
+#ifdef CONFIG_SMP
+ int on_cpu;
+#endif
+ struct task_struct *task;
+ long long at_time;
+ int from_ln;
+ char *in_src;
+ void *from;
+ int *with_shpf;
+ struct task_struct *t1;
+ struct task_struct *t2;
+};
+struct kgdb_and_then_struct kgdb_data[DATA_POINTS];
+
+struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0];
+int kgdb_and_then_count;
+
+void
+kgdb_tstamp(int line, char *source, int data0, int data1)
+{
+ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED;
+ int flags;
+ kgdb_local_irq_save(flags);
+ spin_lock(&ts_spin);
+ rdtscll(kgdb_and_then->at_time);
+#ifdef CONFIG_SMP
+ kgdb_and_then->on_cpu = smp_processor_id();
+#endif
+ kgdb_and_then->task = current;
+ kgdb_and_then->from_ln = line;
+ kgdb_and_then->in_src = source;
+ kgdb_and_then->from = __builtin_return_address(0);
+ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) |
+ (preempt_count() << 8));
+ kgdb_and_then->data0 = data0;
+ kgdb_and_then->data1 = data1;
+ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK];
+ spin_unlock(&ts_spin);
+ kgdb_local_irq_restore(flags);
+#ifdef CONFIG_PREEMPT
+
+#endif
+ return;
+}
+#endif
+typedef int gdb_debug_hook(int exceptionVector,
+ int signo, int err_code, struct pt_regs *linux_regs);
+gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */
+
+static int kgdb_need_breakpoint[NR_CPUS];
+
+void kgdb_schedule_breakpoint(void)
+{
+ kgdb_need_breakpoint[smp_processor_id()] = 1;
+}
+
+void kgdb_process_breakpoint(void)
+{
+ /*
+ * Handle a breakpoint queued from inside network driver code
+ * to avoid reentrancy issues
+ */
+ if (kgdb_need_breakpoint[smp_processor_id()]) {
+ kgdb_need_breakpoint[smp_processor_id()] = 0;
+ BREAKPOINT;
+ }
+}
+
--- linux-2.6.3/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/ldt.c 2004-02-20 00:21:54.000000000 -0800
@@ -2,7 +2,7 @@
* linux/kernel/ldt.c
*
* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar
+ * Copyright (C) 1999, 2003 Ingo Molnar
*/
#include
@@ -18,6 +18,8 @@
#include
#include
#include
+#include
+#include
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *null)
@@ -29,34 +31,31 @@ static void flush_ldt(void *null)
static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
{
- void *oldldt;
- void *newldt;
- int oldsize;
+ int oldsize, newsize, i;
if (mincount <= pc->size)
return 0;
+ /*
+ * LDT got larger - reallocate if necessary.
+ */
oldsize = pc->size;
mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
- else
- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-
- if (oldsize)
- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
- oldldt = pc->ldt;
- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
- pc->ldt = newldt;
- wmb();
+ newsize = mincount*LDT_ENTRY_SIZE;
+ for (i = 0; i < newsize; i += PAGE_SIZE) {
+ int nr = i/PAGE_SIZE;
+ BUG_ON(i >= 64*1024);
+ if (!pc->ldt_pages[nr]) {
+ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER);
+ if (!pc->ldt_pages[nr])
+ return -ENOMEM;
+ clear_highpage(pc->ldt_pages[nr]);
+ }
+ }
pc->size = mincount;
- wmb();
-
if (reload) {
#ifdef CONFIG_SMP
cpumask_t mask;
+
preempt_disable();
load_LDT(pc);
mask = cpumask_of_cpu(smp_processor_id());
@@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i
load_LDT(pc);
#endif
}
- if (oldsize) {
- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(oldldt);
- else
- kfree(oldldt);
- }
return 0;
}
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
- int err = alloc_ldt(new, old->size, 0);
- if (err < 0)
+ int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE;
+
+ err = alloc_ldt(new, size, 0);
+ if (err < 0) {
+ new->size = 0;
return err;
- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+ }
+ for (i = 0; i < nr_pages; i++)
+ copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0);
return 0;
}
@@ -96,6 +94,7 @@ int init_new_context(struct task_struct
init_MUTEX(&mm->context.sem);
mm->context.size = 0;
+ memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES);
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
down(&old_mm->context.sem);
@@ -107,23 +106,21 @@ int init_new_context(struct task_struct
/*
* No need to lock the MM as we are the last user
+ * Do not touch the ldt register, we are already
+ * in the next thread.
*/
void destroy_context(struct mm_struct *mm)
{
- if (mm->context.size) {
- if (mm == current->active_mm)
- clear_LDT();
- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(mm->context.ldt);
- else
- kfree(mm->context.ldt);
- mm->context.size = 0;
- }
+ int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
+
+ for (i = 0; i < nr_pages; i++)
+ __free_page(mm->context.ldt_pages[i]);
+ mm->context.size = 0;
}
static int read_ldt(void __user * ptr, unsigned long bytecount)
{
- int err;
+ int err, i;
unsigned long size;
struct mm_struct * mm = current->mm;
@@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u
size = bytecount;
err = 0;
- if (copy_to_user(ptr, mm->context.ldt, size))
- err = -EFAULT;
+ /*
+ * This is necessary just in case we got here straight from a
+ * context-switch where the ptes were set but no tlb flush
+ * was done yet. We rather avoid doing a TLB flush in the
+ * context-switch path and do it here instead.
+ */
+ __flush_tlb_global();
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ int nr = i / PAGE_SIZE, bytes;
+ char *kaddr = kmap(mm->context.ldt_pages[nr]);
+
+ bytes = size - i;
+ if (bytes > PAGE_SIZE)
+ bytes = PAGE_SIZE;
+ if (copy_to_user(ptr + i, kaddr, size - i))
+ err = -EFAULT;
+ kunmap(mm->context.ldt_pages[nr]);
+ }
up(&mm->context.sem);
if (err < 0)
return err;
@@ -158,7 +172,7 @@ static int read_default_ldt(void __user
err = 0;
address = &default_ldt[0];
- size = 5*sizeof(struct desc_struct);
+ size = 5*LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
@@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr,
goto out_unlock;
}
- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+ /*
+ * No rescheduling allowed from this point to the install.
+ *
+ * We do a TLB flush for the same reason as in the read_ldt() path.
+ */
+ preempt_disable();
+ __flush_tlb_global();
+ lp = (__u32 *) ((ldt_info.entry_number << 3) +
+ (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0));
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
@@ -221,6 +243,7 @@ install:
*lp = entry_1;
*(lp+1) = entry_2;
error = 0;
+ preempt_enable();
out_unlock:
up(&mm->context.sem);
@@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func,
}
return ret;
}
+
+/*
+ * load one particular LDT into the current CPU
+ */
+void load_LDT_nolock(mm_context_t *pc, int cpu)
+{
+ struct page **pages = pc->ldt_pages;
+ int count = pc->size;
+ int nr_pages, i;
+
+ if (likely(!count)) {
+ pages = &default_ldt_page;
+ count = 5;
+ }
+ nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
+
+ for (i = 0; i < nr_pages; i++) {
+ __kunmap_atomic_type(KM_LDT_PAGE0 - i);
+ __kmap_atomic(pages[i], KM_LDT_PAGE0 - i);
+ }
+ set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count);
+ load_LDT_desc();
+}
--- linux-2.6.3/arch/i386/kernel/Makefile 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/Makefile 2004-02-20 00:21:54.000000000 -0800
@@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o
+ doublefault.o entry_trampoline.o
obj-y += cpu/
obj-y += timers/
obj-$(CONFIG_ACPI_BOOT) += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
+obj-$(CONFIG_KGDB) += kgdb_stub.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
@@ -31,6 +32,7 @@ obj-y += sysenter.o vsyscall.o
obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
EXTRA_AFLAGS := -traditional
--- linux-2.6.3/arch/i386/kernel/microcode.c 2003-10-25 14:45:44.000000000 -0700
+++ 25/arch/i386/kernel/microcode.c 2004-02-20 00:19:59.000000000 -0800
@@ -371,7 +371,9 @@ static void do_update_one (void * unused
spin_lock_irqsave(µcode_update_lock, flags);
/* write microcode via MSR 0x79 */
- wrmsr(MSR_IA32_UCODE_WRITE, (unsigned int)(uci->mc->bits), 0);
+ wrmsr(MSR_IA32_UCODE_WRITE,
+ (unsigned long) uci->mc->bits,
+ (unsigned long) uci->mc->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
__asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
@@ -507,3 +509,4 @@ static void __exit microcode_exit (void)
module_init(microcode_init)
module_exit(microcode_exit)
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
--- linux-2.6.3/arch/i386/kernel/mpparse.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/mpparse.c 2004-02-20 00:21:54.000000000 -0800
@@ -668,7 +668,7 @@ void __init get_smp_config (void)
* Read the physical hardware table. Anything here will
* override the defaults.
*/
- if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+ if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) {
smp_found_config = 0;
printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
--- linux-2.6.3/arch/i386/kernel/nmi.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/nmi.c 2004-02-20 00:20:41.000000000 -0800
@@ -31,7 +31,16 @@
#include
#include
+#ifdef CONFIG_KGDB
+#include
+#ifdef CONFIG_SMP
+unsigned int nmi_watchdog = NMI_IO_APIC;
+#else
+unsigned int nmi_watchdog = NMI_LOCAL_APIC;
+#endif
+#else
unsigned int nmi_watchdog = NMI_NONE;
+#endif
static unsigned int nmi_hz = HZ;
unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
extern void show_registers(struct pt_regs *regs);
@@ -42,7 +51,7 @@ extern void show_registers(struct pt_reg
* be enabled
* -1: the lapic NMI watchdog is disabled, but can be enabled
*/
-static int nmi_active;
+int nmi_active;
#define K7_EVNTSEL_ENABLE (1 << 22)
#define K7_EVNTSEL_INT (1 << 20)
@@ -248,7 +257,7 @@ static int __init init_lapic_nmi_sysfs(v
error = sysdev_class_register(&nmi_sysclass);
if (!error)
- error = sys_device_register(&device_lapic_nmi);
+ error = sysdev_register(&device_lapic_nmi);
return error;
}
/* must come after the local APIC's device_initcall() */
@@ -408,6 +417,9 @@ void touch_nmi_watchdog (void)
for (i = 0; i < NR_CPUS; i++)
alert_counter[i] = 0;
}
+#ifdef CONFIG_KGDB
+int tune_watchdog = 5*HZ;
+#endif
void nmi_watchdog_tick (struct pt_regs * regs)
{
@@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs *
sum = irq_stat[cpu].apic_timer_irqs;
+#ifdef CONFIG_KGDB
+ if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) {
+
+#else
if (last_irq_sums[cpu] == sum) {
+#endif
/*
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
*/
alert_counter[cpu]++;
+#ifdef CONFIG_KGDB
+ if (alert_counter[cpu] == tune_watchdog) {
+ kgdb_handle_exception(2, SIGPWR, 0, regs);
+ last_irq_sums[cpu] = sum;
+ alert_counter[cpu] = 0;
+ }
+#endif
if (alert_counter[cpu] == 5*nmi_hz) {
spin_lock(&nmi_print_lock);
/*
@@ -462,6 +486,7 @@ void nmi_watchdog_tick (struct pt_regs *
}
}
+EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
EXPORT_SYMBOL(disable_lapic_nmi_watchdog);
EXPORT_SYMBOL(enable_lapic_nmi_watchdog);
--- linux-2.6.3/arch/i386/kernel/process.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/process.c 2004-02-20 00:21:54.000000000 -0800
@@ -47,6 +47,7 @@
#include
#include
#include
+#include
#ifdef CONFIG_MATH_EMULATION
#include
#endif
@@ -304,6 +305,9 @@ void flush_thread(void)
struct task_struct *tsk = current;
memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+#ifdef CONFIG_X86_HIGH_ENTRY
+ clear_thread_flag(TIF_DB7);
+#endif
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -317,9 +321,8 @@ void release_thread(struct task_struct *
if (dead_task->mm) {
// temporary debugging check
if (dead_task->mm->context.size) {
- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+ printk("WARNING: dead process %8s still has LDT? <%d>\n",
dead_task->comm,
- dead_task->mm->context.ldt,
dead_task->mm->context.size);
BUG();
}
@@ -354,7 +357,17 @@ int copy_thread(int nr, unsigned long cl
p->thread.esp = (unsigned long) childregs;
p->thread.esp0 = (unsigned long) (childregs+1);
+ /*
+ * get the two stack pages, for the virtual stack.
+ *
+ * IMPORTANT: this code relies on the fact that the task
+ * structure is an 8K aligned piece of physical memory.
+ */
+ p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info);
+ p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE);
+
p->thread.eip = (unsigned long) ret_from_fork;
+ p->thread_info->real_stack = p->thread_info;
savesegment(fs,p->thread.fs);
savesegment(gs,p->thread.gs);
@@ -506,10 +519,41 @@ struct task_struct * __switch_to(struct
__unlazy_fpu(prev_p);
+#ifdef CONFIG_X86_HIGH_ENTRY
+ /*
+ * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is
+ * needed because otherwise NMIs could interrupt the
+ * user-return code with a virtual stack and stale TLBs.)
+ */
+ __kunmap_atomic_type(KM_VSTACK0);
+ __kunmap_atomic_type(KM_VSTACK1);
+ __kmap_atomic(next->stack_page0, KM_VSTACK0);
+ __kmap_atomic(next->stack_page1, KM_VSTACK1);
+
+ /*
+ * NOTE: here we rely on the task being the stack as well
+ */
+ next_p->thread_info->virtual_stack =
+ (void *)__kmap_atomic_vaddr(KM_VSTACK0);
+
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+ /*
+ * If next was preempted on entry from userspace to kernel,
+ * and now it's on a different cpu, we need to adjust %esp.
+ * This assumes that entry.S does not copy %esp while on the
+ * virtual stack (with interrupts enabled): which is so,
+ * except within __SWITCH_KERNELSPACE itself.
+ */
+ if (unlikely(next->esp >= TASK_SIZE)) {
+ next->esp &= THREAD_SIZE - 1;
+ next->esp |= (unsigned long) next_p->thread_info->virtual_stack;
+ }
+#endif
+#endif
/*
* Reload esp0, LDT and the page table pointer:
*/
- load_esp0(tss, next);
+ load_virtual_esp0(tss, next_p);
/*
* Load the per-thread Thread-Local Storage descriptor.
@@ -637,6 +681,8 @@ extern void scheduling_functions_start_h
extern void scheduling_functions_end_here(void);
#define first_sched ((unsigned long) scheduling_functions_start_here)
#define last_sched ((unsigned long) scheduling_functions_end_here)
+#define top_esp (THREAD_SIZE - sizeof(unsigned long))
+#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
unsigned long get_wchan(struct task_struct *p)
{
@@ -647,12 +693,12 @@ unsigned long get_wchan(struct task_stru
return 0;
stack_page = (unsigned long)p->thread_info;
esp = p->thread.esp;
- if (!stack_page || esp < stack_page || esp > 8188+stack_page)
+ if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
return 0;
/* include/asm-i386/system.h:switch_to() pushes ebp last. */
ebp = *(unsigned long *) esp;
do {
- if (ebp < stack_page || ebp > 8184+stack_page)
+ if (ebp < stack_page || ebp > top_ebp+stack_page)
return 0;
eip = *(unsigned long *) (ebp+4);
if (eip < first_sched || eip >= last_sched)
--- linux-2.6.3/arch/i386/kernel/reboot.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/reboot.c 2004-02-20 00:21:54.000000000 -0800
@@ -155,12 +155,11 @@ void machine_real_restart(unsigned char
CMOS_WRITE(0x00, 0x8f);
spin_unlock_irqrestore(&rtc_lock, flags);
- /* Remap the kernel at virtual address zero, as well as offset zero
- from the kernel segment. This assumes the kernel segment starts at
- virtual address PAGE_OFFSET. */
-
- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
+ /*
+ * Remap the first 16 MB of RAM (which includes the kernel image)
+ * at virtual address zero:
+ */
+ setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024);
/*
* Use `swapper_pg_dir' as our page directory.
--- linux-2.6.3/arch/i386/kernel/setup.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/setup.c 2004-02-20 00:19:59.000000000 -0800
@@ -1118,6 +1118,19 @@ void __init setup_arch(char **cmdline_p)
#endif
paging_init();
+#ifdef CONFIG_EARLY_PRINTK
+ {
+ char *s = strstr(*cmdline_p, "earlyprintk=");
+ if (s) {
+ extern void setup_early_printk(char *);
+
+ setup_early_printk(s);
+ printk("early console enabled\n");
+ }
+ }
+#endif
+
+
dmi_scan_machine();
#ifdef CONFIG_X86_GENERICARCH
--- linux-2.6.3/arch/i386/kernel/signal.c 2003-11-23 19:03:00.000000000 -0800
+++ 25/arch/i386/kernel/signal.c 2004-02-20 00:21:54.000000000 -0800
@@ -128,28 +128,29 @@ sys_sigaltstack(const stack_t __user *us
*/
static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
+restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *__sc, int *peax)
{
- unsigned int err = 0;
+ struct sigcontext scratch; /* 88 bytes of scratch area */
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
+ if (copy_from_user(&scratch, __sc, sizeof(scratch)))
+ return -EFAULT;
+
+#define COPY(x) regs->x = scratch.x
#define COPY_SEG(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
+ { unsigned short tmp = scratch.seg; \
regs->x##seg = tmp; }
#define COPY_SEG_STRICT(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
+ { unsigned short tmp = scratch.seg; \
regs->x##seg = tmp|3; }
#define GET_SEG(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
+ { unsigned short tmp = scratch.seg; \
loadsegment(seg,tmp); }
GET_SEG(gs);
@@ -168,27 +169,23 @@ restore_sigcontext(struct pt_regs *regs,
COPY_SEG_STRICT(ss);
{
- unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
+ unsigned int tmpflags = scratch.eflags;
regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
regs->orig_eax = -1; /* disable syscall checks */
}
{
- struct _fpstate __user * buf;
- err |= __get_user(buf, &sc->fpstate);
+ struct _fpstate * buf = scratch.fpstate;
if (buf) {
if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
+ return -EFAULT;
+ if (restore_i387(buf))
+ return -EFAULT;
}
}
- err |= __get_user(*peax, &sc->eax);
- return err;
-
-badframe:
- return 1;
+ *peax = scratch.eax;
+ return 0;
}
asmlinkage int sys_sigreturn(unsigned long __unused)
@@ -266,46 +263,47 @@ badframe:
*/
static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
- int tmp, err = 0;
+ struct sigcontext sc; /* 88 bytes of scratch area */
+ int tmp;
tmp = 0;
__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
- err |= __put_user(tmp, (unsigned int *)&sc->gs);
+ *(unsigned int *)&sc.gs = tmp;
__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
- err |= __put_user(tmp, (unsigned int *)&sc->fs);
-
- err |= __put_user(regs->xes, (unsigned int *)&sc->es);
- err |= __put_user(regs->xds, (unsigned int *)&sc->ds);
- err |= __put_user(regs->edi, &sc->edi);
- err |= __put_user(regs->esi, &sc->esi);
- err |= __put_user(regs->ebp, &sc->ebp);
- err |= __put_user(regs->esp, &sc->esp);
- err |= __put_user(regs->ebx, &sc->ebx);
- err |= __put_user(regs->edx, &sc->edx);
- err |= __put_user(regs->ecx, &sc->ecx);
- err |= __put_user(regs->eax, &sc->eax);
- err |= __put_user(current->thread.trap_no, &sc->trapno);
- err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user(regs->eip, &sc->eip);
- err |= __put_user(regs->xcs, (unsigned int *)&sc->cs);
- err |= __put_user(regs->eflags, &sc->eflags);
- err |= __put_user(regs->esp, &sc->esp_at_signal);
- err |= __put_user(regs->xss, (unsigned int *)&sc->ss);
+ *(unsigned int *)&sc.fs = tmp;
+ *(unsigned int *)&sc.es = regs->xes;
+ *(unsigned int *)&sc.ds = regs->xds;
+ sc.edi = regs->edi;
+ sc.esi = regs->esi;
+ sc.ebp = regs->ebp;
+ sc.esp = regs->esp;
+ sc.ebx = regs->ebx;
+ sc.edx = regs->edx;
+ sc.ecx = regs->ecx;
+ sc.eax = regs->eax;
+ sc.trapno = current->thread.trap_no;
+ sc.err = current->thread.error_code;
+ sc.eip = regs->eip;
+ *(unsigned int *)&sc.cs = regs->xcs;
+ sc.eflags = regs->eflags;
+ sc.esp_at_signal = regs->esp;
+ *(unsigned int *)&sc.ss = regs->xss;
tmp = save_i387(fpstate);
if (tmp < 0)
- err = 1;
- else
- err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+ return 1;
+ sc.fpstate = tmp ? fpstate : NULL;
/* non-iBCS2 extensions.. */
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(current->thread.cr2, &sc->cr2);
+ sc.oldmask = mask;
+ sc.cr2 = current->thread.cr2;
- return err;
+ if (copy_to_user(__sc, &sc, sizeof(sc)))
+ return 1;
+ return 0;
}
/*
@@ -443,7 +441,7 @@ static void setup_rt_frame(int sig, stru
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->esp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
--- linux-2.6.3/arch/i386/kernel/smpboot.c 2004-02-17 20:48:42.000000000 -0800
+++ 25/arch/i386/kernel/smpboot.c 2004-02-20 00:20:55.000000000 -0800
@@ -39,6 +39,7 @@
#include
#include
+#include
#include
#include
#include
@@ -934,7 +935,7 @@ static int boot_cpu_logical_apicid;
/* Where the IO area was mapped on multiquad, always 0 otherwise */
void *xquad_portio;
-int cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
static void __init smp_boot_cpus(unsigned int max_cpus)
{
@@ -948,10 +949,13 @@ static void __init smp_boot_cpus(unsigne
printk("CPU%d: ", 0);
print_cpu_info(&cpu_data[0]);
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
boot_cpu_logical_apicid = logical_smp_processor_id();
current_thread_info()->cpu = 0;
smp_tune_scheduling();
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
/*
* If we couldn't find an SMP configuration at boot time,
@@ -1008,8 +1012,6 @@ static void __init smp_boot_cpus(unsigne
setup_local_APIC();
map_cpu_to_logical_apicid();
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
- BUG();
setup_portio_remap();
@@ -1080,32 +1082,34 @@ static void __init smp_boot_cpus(unsigne
Dprintk("Boot done.\n");
/*
- * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so
- * that we can tell the sibling CPU efficiently.
+ * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * efficiently.
*/
- if (cpu_has_ht && smp_num_siblings > 1) {
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- cpu_sibling_map[cpu] = NO_PROC_ID;
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ int siblings = 0;
+ int i;
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+ if (smp_num_siblings > 1) {
for (i = 0; i < NR_CPUS; i++) {
- if (i == cpu || !cpu_isset(i, cpu_callout_map))
+ if (!cpu_isset(i, cpu_callout_map))
continue;
if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_sibling_map[cpu] = i;
- printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]);
- break;
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
}
}
- if (cpu_sibling_map[cpu] == NO_PROC_ID) {
- smp_num_siblings = 1;
- printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu);
- }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
}
+
+ if (siblings != smp_num_siblings)
+ printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
}
smpboot_setup_io_apic();
@@ -1119,6 +1123,216 @@ static void __init smp_boot_cpus(unsigne
synchronize_tsc_bp();
}
+#ifdef CONFIG_SCHED_SMT
+#ifdef CONFIG_NUMA
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group sched_group_nodes[MAX_NUMNODES];
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+__init void arch_init_sched_domains(void)
+{
+ int i;
+ struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+ /* Set up domains */
+ for_each_cpu(i) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+ struct sched_domain *node_domain = &per_cpu(node_domains, i);
+ int node = cpu_to_node(i);
+ cpumask_t nodemask = node_to_cpumask(node);
+
+ *cpu_domain = SD_SIBLING_INIT;
+ cpu_domain->span = cpu_sibling_map[i];
+
+ *phys_domain = SD_CPU_INIT;
+ phys_domain->span = nodemask;
+
+ *node_domain = SD_NODE_INIT;
+ node_domain->span = cpu_possible_map;
+ }
+
+ /* Set up CPU (sibling) groups */
+ for_each_cpu(i) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ int j;
+ first_cpu = last_cpu = NULL;
+
+ if (i != first_cpu(cpu_domain->span))
+ continue;
+
+ for_each_cpu_mask(j, cpu_domain->span) {
+ struct sched_group *cpu = &sched_group_cpus[j];
+
+ cpu->cpumask = CPU_MASK_NONE;
+ cpu_set(j, cpu->cpumask);
+ cpu->cpu_power = SCHED_LOAD_SCALE;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+ }
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ int j;
+ cpumask_t nodemask;
+ struct sched_group *node = &sched_group_nodes[i];
+ cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map);
+
+ if (cpus_empty(nodemask))
+ continue;
+
+ first_cpu = last_cpu = NULL;
+ /* Set up physical groups */
+ for_each_cpu_mask(j, nodemask) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(j);
+ struct sched_group *cpu = &sched_group_phys[j];
+
+ if (j != first_cpu(cpu_domain->span))
+ continue;
+
+ cpu->cpumask = cpu_domain->span;
+ /*
+ * Make each extra sibling increase power by 10% of
+ * the basic CPU. This is very arbitrary.
+ */
+ cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+ node->cpu_power += cpu->cpu_power;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+ }
+
+ /* Set up nodes */
+ first_cpu = last_cpu = NULL;
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ struct sched_group *cpu = &sched_group_nodes[i];
+ cpumask_t nodemask;
+ cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map);
+
+ if (cpus_empty(nodemask))
+ continue;
+
+ cpu->cpumask = nodemask;
+ /* ->cpu_power already setup */
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+
+ mb();
+ for_each_cpu(i) {
+ int node = cpu_to_node(i);
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+ struct sched_domain *node_domain = &per_cpu(node_domains, i);
+ struct sched_group *cpu_group = &sched_group_cpus[i];
+ struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)];
+ struct sched_group *node_group = &sched_group_nodes[node];
+
+ cpu_domain->parent = phys_domain;
+ phys_domain->parent = node_domain;
+
+ node_domain->groups = node_group;
+ phys_domain->groups = phys_group;
+ cpu_domain->groups = cpu_group;
+ }
+}
+#else /* CONFIG_NUMA */
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+__init void arch_init_sched_domains(void)
+{
+ int i;
+ struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+ /* Set up domains */
+ for_each_cpu(i) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+
+ *cpu_domain = SD_SIBLING_INIT;
+ cpu_domain->span = cpu_sibling_map[i];
+
+ *phys_domain = SD_CPU_INIT;
+ phys_domain->span = cpu_possible_map;
+ }
+
+ /* Set up CPU (sibling) groups */
+ for_each_cpu(i) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ int j;
+ first_cpu = last_cpu = NULL;
+
+ if (i != first_cpu(cpu_domain->span))
+ continue;
+
+ for_each_cpu_mask(j, cpu_domain->span) {
+ struct sched_group *cpu = &sched_group_cpus[j];
+
+ cpus_clear(cpu->cpumask);
+ cpu_set(j, cpu->cpumask);
+ cpu->cpu_power = SCHED_LOAD_SCALE;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+ }
+
+ first_cpu = last_cpu = NULL;
+ /* Set up physical groups */
+ for_each_cpu(i) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_group *cpu = &sched_group_phys[i];
+
+ if (i != first_cpu(cpu_domain->span))
+ continue;
+
+ cpu->cpumask = cpu_domain->span;
+ /* See SMT+NUMA setup for comment */
+ cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+
+ mb();
+ for_each_cpu(i) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+ struct sched_group *cpu_group = &sched_group_cpus[i];
+ struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)];
+ cpu_domain->parent = phys_domain;
+ phys_domain->groups = phys_group;
+ cpu_domain->groups = cpu_group;
+ }
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_SCHED_SMT */
+
/* These are wrappers to interface to the new boot process. Someone
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
--- linux-2.6.3/arch/i386/kernel/smp.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/smp.c 2004-02-20 00:21:54.000000000 -0800
@@ -327,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt
if (flush_mm == cpu_tlbstate[cpu].active_mm) {
if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
if (flush_va == FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(flush_va);
+#endif
} else
leave_mm(cpu);
}
@@ -396,21 +398,6 @@ static void flush_tlb_others(cpumask_t c
spin_unlock(&tlbstate_lock);
}
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
- preempt_enable();
-}
-
void flush_tlb_mm (struct mm_struct * mm)
{
cpumask_t cpu_mask;
@@ -442,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc
if (current->active_mm == mm) {
if(current->mm)
- __flush_tlb_one(va);
+#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ __flush_tlb_one(va)
+#endif
+ ;
else
leave_mm(smp_processor_id());
}
@@ -466,7 +456,17 @@ void flush_tlb_all(void)
{
on_each_cpu(do_flush_tlb_all, 0, 1, 1);
}
-
+#ifdef CONFIG_KGDB
+/*
+ * By using the NMI code instead of a vector we just sneak thru the
+ * word generator coming out with just what we want. AND it does
+ * not matter if clustered_apic_mode is set or not.
+ */
+void smp_send_nmi_allbutself(void)
+{
+ send_IPI_allbutself(APIC_DM_NMI);
+}
+#endif
/*
* this function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing
--- linux-2.6.3/arch/i386/kernel/sysenter.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/sysenter.c 2004-02-20 00:21:54.000000000 -0800
@@ -18,13 +18,18 @@
#include
#include
#include
+#include
extern asmlinkage void sysenter_entry(void);
void enable_sep_cpu(void *info)
{
int cpu = get_cpu();
+#ifdef CONFIG_X86_HIGH_ENTRY
+ struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
+#else
struct tss_struct *tss = init_tss + cpu;
+#endif
tss->ss1 = __KERNEL_CS;
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
--- linux-2.6.3/arch/i386/kernel/sys_i386.c 2003-06-14 12:18:35.000000000 -0700
+++ 25/arch/i386/kernel/sys_i386.c 2004-02-20 00:21:22.000000000 -0800
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -106,8 +107,6 @@ out:
}
-extern asmlinkage int sys_select(int, fd_set __user *, fd_set __user *, fd_set __user *, struct timeval __user *);
-
struct sel_arg_struct {
unsigned long n;
fd_set __user *inp, *outp, *exp;
--- linux-2.6.3/arch/i386/kernel/time.c 2004-02-03 20:42:34.000000000 -0800
+++ 25/arch/i386/kernel/time.c 2004-02-20 00:20:36.000000000 -0800
@@ -346,7 +346,7 @@ static int time_init_device(void)
{
int error = sysdev_class_register(&pit_sysclass);
if (!error)
- error = sys_device_register(&device_i8253);
+ error = sysdev_register(&device_i8253);
return error;
}
--- linux-2.6.3/arch/i386/kernel/timers/common.c 2003-09-27 18:57:43.000000000 -0700
+++ 25/arch/i386/kernel/timers/common.c 2004-02-20 00:19:59.000000000 -0800
@@ -137,3 +137,23 @@ bad_calibration:
}
#endif
+/* calculate cpu_khz */
+void __init init_cpu_khz(void)
+{
+ if (cpu_has_tsc) {
+ unsigned long tsc_quotient = calibrate_tsc();
+ if (tsc_quotient) {
+ /* report CPU clock rate in Hz.
+ * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+ * clock/second. Our precision is about 100 ppm.
+ */
+ { unsigned long eax=0, edx=1000;
+ __asm__("divl %2"
+ :"=a" (cpu_khz), "=d" (edx)
+ :"r" (tsc_quotient),
+ "0" (eax), "1" (edx));
+ printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
+ }
+ }
+ }
+}
--- linux-2.6.3/arch/i386/kernel/timers/Makefile 2003-09-27 18:57:43.000000000 -0700
+++ 25/arch/i386/kernel/timers/Makefile 2004-02-20 00:19:59.000000000 -0800
@@ -6,3 +6,4 @@ obj-y := timer.o timer_none.o timer_tsc.
obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
+obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
--- linux-2.6.3/arch/i386/kernel/timers/timer.c 2003-09-08 13:58:55.000000000 -0700
+++ 25/arch/i386/kernel/timers/timer.c 2004-02-20 00:19:59.000000000 -0800
@@ -19,6 +19,9 @@ static struct timer_opts* timers[] = {
#ifdef CONFIG_HPET_TIMER
&timer_hpet,
#endif
+#ifdef CONFIG_X86_PM_TIMER
+ &timer_pmtmr,
+#endif
&timer_tsc,
&timer_pit,
NULL,
--- linux-2.6.3/arch/i386/kernel/timers/timer_cyclone.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/timers/timer_cyclone.c 2004-02-20 00:19:59.000000000 -0800
@@ -212,26 +212,7 @@ static int __init init_cyclone(char* ove
}
}
- /* init cpu_khz.
- * XXX - This should really be done elsewhere,
- * and in a more generic fashion. -johnstul@us.ibm.com
- */
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
+ init_cpu_khz();
/* Everything looks good! */
return 0;
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/i386/kernel/timers/timer_pm.c 2004-02-20 00:19:59.000000000 -0800
@@ -0,0 +1,217 @@
+/*
+ * (C) Dominik Brodowski 2003
+ *
+ * Driver to use the Power Management Timer (PMTMR) available in some
+ * southbridges as primary timing source for the Linux kernel.
+ *
+ * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
+ * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
+ *
+ * This file is licensed under the GPL v2.
+ */
+
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+/* The I/O port the PMTMR resides at.
+ * The location is detected during setup_arch(),
+ * in arch/i386/acpi/boot.c */
+u32 pmtmr_ioport = 0;
+
+
+/* value of the Power timer at last timer interrupt */
+static u32 offset_tick;
+static u32 offset_delay;
+
+static unsigned long long monotonic_base;
+static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
+
+#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+
+/*helper function to safely read acpi pm timesource*/
+static inline u32 read_pmtmr(void)
+{
+ u32 v1=0,v2=0,v3=0;
+ /* It has been reported that because of various broken
+ * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+ * source is not latched, so you must read it multiple
+ * times to insure a safe value is read.
+ */
+ do {
+ v1 = inl(pmtmr_ioport);
+ v2 = inl(pmtmr_ioport);
+ v3 = inl(pmtmr_ioport);
+ } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+ || (v3 > v1 && v3 < v2));
+
+ /* mask the output to 24 bits */
+ return v2 & ACPI_PM_MASK;
+}
+
+static int init_pmtmr(char* override)
+{
+ u32 value1, value2;
+ unsigned int i;
+
+ if (override[0] && strncmp(override,"pmtmr",5))
+ return -ENODEV;
+
+ if (!pmtmr_ioport)
+ return -ENODEV;
+
+ /* we use the TSC for delay_pmtmr, so make sure it exists */
+ if (!cpu_has_tsc)
+ return -ENODEV;
+
+ /* "verify" this timing source */
+ value1 = read_pmtmr();
+ for (i = 0; i < 10000; i++) {
+ value2 = read_pmtmr();
+ if (value2 == value1)
+ continue;
+ if (value2 > value1)
+ goto pm_good;
+ if ((value2 < value1) && ((value2) < 0xFFF))
+ goto pm_good;
+ printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
+ return -EINVAL;
+ }
+ printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
+ return -ENODEV;
+
+pm_good:
+ init_cpu_khz();
+ return 0;
+}
+
+static inline u32 cyc2us(u32 cycles)
+{
+ /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
+ * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
+ *
+ * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
+ * easily be multiplied with 286 (=0x11E) without having to fear
+ * u32 overflows.
+ */
+ cycles *= 286;
+ return (cycles >> 10);
+}
+
+/*
+ * this gets called during each timer interrupt
+ * - Called while holding the writer xtime_lock
+ */
+static void mark_offset_pmtmr(void)
+{
+ u32 lost, delta, last_offset;
+ static int first_run = 1;
+ last_offset = offset_tick;
+
+ write_seqlock(&monotonic_lock);
+
+ offset_tick = read_pmtmr();
+
+ /* calculate tick interval */
+ delta = (offset_tick - last_offset) & ACPI_PM_MASK;
+
+ /* convert to usecs */
+ delta = cyc2us(delta);
+
+ /* update the monotonic base value */
+ monotonic_base += delta * NSEC_PER_USEC;
+ write_sequnlock(&monotonic_lock);
+
+ /* convert to ticks */
+ delta += offset_delay;
+ lost = delta / (USEC_PER_SEC / HZ);
+ offset_delay = delta % (USEC_PER_SEC / HZ);
+
+
+ /* compensate for lost ticks */
+ if (lost >= 2)
+ jiffies_64 += lost - 1;
+
+ /* don't calculate delay for first run,
+ or if we've got less then a tick */
+ if (first_run || (lost < 1)) {
+ first_run = 0;
+ offset_delay = 0;
+ }
+}
+
+
+static unsigned long long monotonic_clock_pmtmr(void)
+{
+ u32 last_offset, this_offset;
+ unsigned long long base, ret;
+ unsigned seq;
+
+
+ /* atomically read monotonic base & last_offset */
+ do {
+ seq = read_seqbegin(&monotonic_lock);
+ last_offset = offset_tick;
+ base = monotonic_base;
+ } while (read_seqretry(&monotonic_lock, seq));
+
+ /* Read the pmtmr */
+ this_offset = read_pmtmr();
+
+ /* convert to nanoseconds */
+ ret = (this_offset - last_offset) & ACPI_PM_MASK;
+ ret = base + (cyc2us(ret) * NSEC_PER_USEC);
+ return ret;
+}
+
+static void delay_pmtmr(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtscl(bclock);
+ do
+ {
+ rep_nop();
+ rdtscl(now);
+ } while ((now-bclock) < loops);
+}
+
+
+/*
+ * get the offset (in microseconds) from the last call to mark_offset()
+ * - Called holding a reader xtime_lock
+ */
+static unsigned long get_offset_pmtmr(void)
+{
+ u32 now, offset, delta = 0;
+
+ offset = offset_tick;
+ now = read_pmtmr();
+ delta = (now - offset)&ACPI_PM_MASK;
+
+ return (unsigned long) offset_delay + cyc2us(delta);
+}
+
+
+/* acpi timer_opts struct */
+struct timer_opts timer_pmtmr = {
+ .name = "pmtmr",
+ .init = init_pmtmr,
+ .mark_offset = mark_offset_pmtmr,
+ .get_offset = get_offset_pmtmr,
+ .monotonic_clock = monotonic_clock_pmtmr,
+ .delay = delay_pmtmr,
+};
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dominik Brodowski ");
+MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
--- linux-2.6.3/arch/i386/kernel/traps.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/traps.c 2004-02-20 00:21:54.000000000 -0800
@@ -54,12 +54,8 @@
#include "mach_traps.h"
-asmlinkage int system_call(void);
-asmlinkage void lcall7(void);
-asmlinkage void lcall27(void);
-
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
- { 0, 0 }, { 0, 0 } };
+struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
+struct page *default_ldt_page;
/* Do we ignore FPU interrupts ? */
char ignore_fpu_irq = 0;
@@ -91,6 +87,43 @@ asmlinkage void alignment_check(void);
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
+#ifdef CONFIG_KGDB
+extern void sysenter_entry(void);
+#include
+#include
+extern void int3(void);
+extern void debug(void);
+void set_intr_gate(unsigned int n, void *addr);
+static void set_intr_usr_gate(unsigned int n, void *addr);
+/*
+ * Should be able to call this breakpoint() very early in
+ * bring up. Just hard code the call where needed.
+ * The breakpoint() code is here because set_?_gate() functions
+ * are local (static) to trap.c. They need be done only once,
+ * but it does not hurt to do them over.
+ */
+void breakpoint(void)
+{
+ init_entry_mappings();
+ set_intr_usr_gate(3,&int3); /* disable ints on trap */
+ set_intr_gate(1,&debug);
+ set_intr_gate(14,&page_fault);
+
+ BREAKPOINT;
+}
+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \
+ { \
+ if (!user_mode(regs) ) \
+ { \
+ kgdb_handle_exception(trapnr, signr, error_code, regs); \
+ after; \
+ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \
+ }
+#else
+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after)
+#endif
+
+
static int kstack_depth_to_print = 24;
void show_trace(struct task_struct *task, unsigned long * stack)
@@ -119,7 +152,7 @@ void show_trace_task(struct task_struct
unsigned long esp = tsk->thread.esp;
/* User space on another CPU? */
- if ((esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1))
+ if ((esp ^ (unsigned long)tsk->thread_info) & ~(THREAD_SIZE - 1))
return;
show_trace(tsk, (unsigned long *)esp);
}
@@ -175,8 +208,9 @@ void show_registers(struct pt_regs *regs
ss = regs->xss & 0xffff;
}
print_modules();
- printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n",
- smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags);
+ printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx\n",
+ smp_processor_id(), 0xffff & regs->xcs,
+ regs->eip, print_tainted(), regs->eflags);
print_symbol("EIP is at %s\n", regs->eip);
printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
@@ -192,23 +226,27 @@ void show_registers(struct pt_regs *regs
* time of the fault..
*/
if (in_kernel) {
+ u8 *eip;
printk("\nStack: ");
show_stack(NULL, (unsigned long*)esp);
printk("Code: ");
- if(regs->eip < PAGE_OFFSET)
- goto bad;
- for(i=0;i<20;i++)
- {
- unsigned char c;
- if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
-bad:
+ eip = (u8 *)regs->eip - 43;
+ for (i = 0; i < 64; i++, eip++) {
+ unsigned char c = 0xff;
+
+ if ((user_mode(regs) && get_user(c, eip)) ||
+ (!user_mode(regs) && __direct_get_user(c, eip))) {
+
printk(" Bad EIP value.");
break;
}
- printk("%02x ", c);
+ if (eip == (u8 *)regs->eip)
+ printk("<%02x> ", c);
+ else
+ printk("%02x ", c);
}
}
printk("\n");
@@ -255,12 +293,36 @@ spinlock_t die_lock = SPIN_LOCK_UNLOCKED
void die(const char * str, struct pt_regs * regs, long err)
{
static int die_counter;
+ int nl = 0;
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
handle_BUG(regs);
printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+ nl = 1;
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+ nl = 1;
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+ nl = 1;
+#endif
+ if (nl)
+ printk("\n");
+#ifdef CONFIG_KGDB
+ /* This is about the only place we want to go to kgdb even if in
+ * user mode. But we must go in via a trap so within kgdb we will
+ * always be in kernel mode.
+ */
+ if (user_mode(regs))
+ BREAKPOINT;
+#endif
+ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,)
show_registers(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
@@ -330,6 +392,7 @@ static inline void do_trap(int trapnr, i
#define DO_ERROR(trapnr, signr, str, name) \
asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
+ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\
do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
}
@@ -347,7 +410,9 @@ asmlinkage void do_##name(struct pt_regs
#define DO_VM86_ERROR(trapnr, signr, str, name) \
asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
+ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\
do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+ return; \
}
#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -394,8 +459,10 @@ gp_in_vm86:
return;
gp_in_kernel:
- if (!fixup_exception(regs))
+ if (!fixup_exception(regs)){
+ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,)
die("general protection fault", regs, error_code);
+ }
}
static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
@@ -534,10 +601,18 @@ asmlinkage void do_debug(struct pt_regs
if (regs->eflags & X86_EFLAGS_IF)
local_irq_enable();
- /* Mask out spurious debug traps due to lazy DR7 setting */
+ /*
+ * Mask out spurious debug traps due to lazy DR7 setting or
+ * due to 4G/4G kernel mode:
+ */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
if (!tsk->thread.debugreg[7])
goto clear_dr7;
+ if (!user_mode(regs)) {
+ // restore upon return-to-userspace:
+ set_thread_flag(TIF_DB7);
+ goto clear_dr7;
+ }
}
if (regs->eflags & VM_MASK)
@@ -557,8 +632,18 @@ asmlinkage void do_debug(struct pt_regs
* allowing programs to debug themselves without the ptrace()
* interface.
*/
+#ifdef CONFIG_KGDB
+ /*
+ * I think this is the only "real" case of a TF in the kernel
+ * that really belongs to user space. Others are
+ * "Ours all ours!"
+ */
+ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry))
+ goto clear_TF_reenable;
+#else
if ((regs->xcs & 3) == 0)
goto clear_TF_reenable;
+#endif
if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
goto clear_TF;
}
@@ -570,6 +655,17 @@ asmlinkage void do_debug(struct pt_regs
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
+#ifdef CONFIG_KGDB
+ /*
+ * If this is a kernel mode trap, we need to reset db7 to allow us
+ * to continue sanely ALSO skip the signal delivery
+ */
+ if ((regs->xcs & 3) == 0)
+ goto clear_dr7;
+
+ /* if not kernel, allow ints but only if they were on */
+ if ( regs->eflags & 0x200) local_irq_enable();
+#endif
/* If this is a kernel mode trap, save the user PC on entry to
* the kernel, that's what the debugger can make sense of.
*/
@@ -584,6 +680,7 @@ clear_dr7:
__asm__("movl %0,%%db7"
: /* no output */
: "r" (0));
+ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,)
return;
debug_vm86:
@@ -779,19 +876,53 @@ asmlinkage void math_emulate(long arg)
#endif /* CONFIG_MATH_EMULATION */
-#ifdef CONFIG_X86_F00F_BUG
-void __init trap_init_f00f_bug(void)
+void __init trap_init_virtual_IDT(void)
{
- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
-
/*
- * Update the IDT descriptor and reload the IDT so that
- * it uses the read-only mapped virtual address.
+ * "idt" is magic - it overlaps the idt_descr
+ * variable so that updating idt will automatically
+ * update the idt descriptor..
*/
- idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+ __set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+ idt_descr.address = __fix_to_virt(FIX_IDT);
+
__asm__ __volatile__("lidt %0" : : "m" (idt_descr));
}
+
+void __init trap_init_virtual_GDT(void)
+{
+ int cpu = smp_processor_id();
+ struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu;
+ struct Xgt_desc_struct tmp_desc = {0, 0};
+ struct tss_struct * t;
+
+ __asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory");
+
+#ifdef CONFIG_X86_HIGH_ENTRY
+ if (!cpu) {
+ __set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL);
+ __set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL);
+ __set_fixmap(FIX_TSS_0, __pa(init_tss), PAGE_KERNEL);
+ __set_fixmap(FIX_TSS_1, __pa(init_tss) + 1*PAGE_SIZE, PAGE_KERNEL);
+ __set_fixmap(FIX_TSS_2, __pa(init_tss) + 2*PAGE_SIZE, PAGE_KERNEL);
+ __set_fixmap(FIX_TSS_3, __pa(init_tss) + 3*PAGE_SIZE, PAGE_KERNEL);
+ }
+
+ gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu;
+#else
+ gdt_desc->address = (unsigned long)cpu_gdt_table[cpu];
+#endif
+ __asm__ __volatile__("lgdt %0": "=m" (*gdt_desc));
+
+#ifdef CONFIG_X86_HIGH_ENTRY
+ t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
+#else
+ t = init_tss + cpu;
#endif
+ set_tss_desc(cpu, t);
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+ load_TR_desc();
+}
#define _set_gate(gate_addr,type,dpl,addr,seg) \
do { \
@@ -818,20 +949,26 @@ void set_intr_gate(unsigned int n, void
_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
}
-static void __init set_trap_gate(unsigned int n, void *addr)
+void __init set_trap_gate(unsigned int n, void *addr)
{
_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
}
-static void __init set_system_gate(unsigned int n, void *addr)
+void __init set_system_gate(unsigned int n, void *addr)
{
_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
}
-static void __init set_call_gate(void *a, void *addr)
+void __init set_call_gate(void *a, void *addr)
{
_set_gate(a,12,3,addr,__KERNEL_CS);
}
+#ifdef CONFIG_KGDB
+void set_intr_usr_gate(unsigned int n, void *addr)
+{
+ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS);
+}
+#endif
static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
{
@@ -850,11 +987,16 @@ void __init trap_init(void)
#ifdef CONFIG_X86_LOCAL_APIC
init_apic_mappings();
#endif
+ init_entry_mappings();
set_trap_gate(0,÷_error);
set_intr_gate(1,&debug);
set_intr_gate(2,&nmi);
+#ifndef CONFIG_KGDB
set_system_gate(3,&int3); /* int3-5 can be called from all */
+#else
+ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */
+#endif
set_system_gate(4,&overflow);
set_system_gate(5,&bounds);
set_trap_gate(6,&invalid_op);
--- linux-2.6.3/arch/i386/kernel/vm86.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/kernel/vm86.c 2004-02-20 00:21:54.000000000 -0800
@@ -125,7 +125,7 @@ struct pt_regs * save_v86_state(struct k
tss = init_tss + get_cpu();
current->thread.esp0 = current->thread.saved_esp0;
current->thread.sysenter_cs = __KERNEL_CS;
- load_esp0(tss, ¤t->thread);
+ load_virtual_esp0(tss, current);
current->thread.saved_esp0 = 0;
put_cpu();
@@ -305,7 +305,7 @@ static void do_sys_vm86(struct kernel_vm
tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
if (cpu_has_sep)
tsk->thread.sysenter_cs = 0;
- load_esp0(tss, &tsk->thread);
+ load_virtual_esp0(tss, tsk);
put_cpu();
tsk->thread.screen_bitmap = info->screen_bitmap;
--- linux-2.6.3/arch/i386/kernel/vmlinux.lds.S 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/vmlinux.lds.S 2004-02-20 00:21:54.000000000 -0800
@@ -3,6 +3,9 @@
*/
#include
+#include
+#include
+#include
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
@@ -10,7 +13,7 @@ ENTRY(startup_32)
jiffies = jiffies_64;
SECTIONS
{
- . = 0xC0000000 + 0x100000;
+ . = __PAGE_OFFSET + 0x100000;
/* read-only */
_text = .; /* Text and read-only data */
.text : {
@@ -19,6 +22,19 @@ SECTIONS
*(.gnu.warning)
} = 0x9090
+#ifdef CONFIG_X86_4G
+ . = ALIGN(PAGE_SIZE_asm);
+ __entry_tramp_start = .;
+ . = FIX_ENTRY_TRAMPOLINE_0_addr;
+ __start___entry_text = .;
+ .entry.text : AT (__entry_tramp_start) { *(.entry.text) }
+ __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text);
+ . = __entry_tramp_end;
+ . = ALIGN(PAGE_SIZE_asm);
+#else
+ .entry.text : { *(.entry.text) }
+#endif
+
_etext = .; /* End of text section */
. = ALIGN(16); /* Exception table */
@@ -34,15 +50,12 @@ SECTIONS
CONSTRUCTORS
}
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE_asm);
__nosave_begin = .;
.data_nosave : { *(.data.nosave) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE_asm);
__nosave_end = .;
- . = ALIGN(4096);
- .data.page_aligned : { *(.data.idt) }
-
. = ALIGN(32);
.data.cacheline_aligned : { *(.data.cacheline_aligned) }
@@ -52,7 +65,7 @@ SECTIONS
.data.init_task : { *(.data.init_task) }
/* will be freed after init */
- . = ALIGN(4096); /* Init code and data */
+ . = ALIGN(PAGE_SIZE_asm); /* Init code and data */
__init_begin = .;
.init.text : {
_sinittext = .;
@@ -91,7 +104,7 @@ SECTIONS
from .altinstructions and .eh_frame */
.exit.text : { *(.exit.text) }
.exit.data : { *(.exit.data) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE_asm);
__initramfs_start = .;
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
@@ -99,10 +112,22 @@ SECTIONS
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE_asm);
__init_end = .;
/* freed after init ends here */
-
+
+ . = ALIGN(PAGE_SIZE_asm);
+ .data.page_aligned_tss : { *(.data.tss) }
+
+ . = ALIGN(PAGE_SIZE_asm);
+ .data.page_aligned_default_ldt : { *(.data.default_ldt) }
+
+ . = ALIGN(PAGE_SIZE_asm);
+ .data.page_aligned_idt : { *(.data.idt) }
+
+ . = ALIGN(PAGE_SIZE_asm);
+ .data.page_aligned_gdt : { *(.data.gdt) }
+
__bss_start = .; /* BSS */
.bss : { *(.bss) }
__bss_stop = .;
@@ -122,4 +147,6 @@ SECTIONS
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
+
+
}
--- linux-2.6.3/arch/i386/kernel/vsyscall.lds 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/vsyscall.lds 2004-02-20 00:21:54.000000000 -0800
@@ -5,7 +5,7 @@
*/
/* This must match . */
-VSYSCALL_BASE = 0xffffe000;
+VSYSCALL_BASE = 0xffffd000;
SECTIONS
{
--- linux-2.6.3/arch/i386/kernel/vsyscall-sysenter.S 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/kernel/vsyscall-sysenter.S 2004-02-20 00:21:54.000000000 -0800
@@ -7,6 +7,11 @@
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
+ cmpl $192, %eax
+ jne 1f
+ int $0x80
+ ret
+1:
push %ecx
.Lpush_ecx:
push %edx
--- linux-2.6.3/arch/i386/lib/checksum.S 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/lib/checksum.S 2004-02-20 00:21:54.000000000 -0800
@@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic (
.previous
.align 4
-.globl csum_partial_copy_generic
+.globl direct_csum_partial_copy_generic
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
#define ARGBASE 16
#define FP 12
-csum_partial_copy_generic:
+direct_csum_partial_copy_generic:
subl $4,%esp
pushl %edi
pushl %esi
@@ -422,7 +422,7 @@ DST( movb %cl, (%edi) )
#define ARGBASE 12
-csum_partial_copy_generic:
+direct_csum_partial_copy_generic:
pushl %ebx
pushl %edi
pushl %esi
--- linux-2.6.3/arch/i386/lib/dec_and_lock.c 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/lib/dec_and_lock.c 2004-02-20 00:21:52.000000000 -0800
@@ -10,6 +10,7 @@
#include
#include
+#ifndef ATOMIC_DEC_AND_LOCK
int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
{
int counter;
@@ -38,3 +39,5 @@ slow_path:
spin_unlock(lock);
return 0;
}
+#endif
+
--- linux-2.6.3/arch/i386/lib/getuser.S 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/lib/getuser.S 2004-02-20 00:21:54.000000000 -0800
@@ -9,6 +9,7 @@
* return value.
*/
#include
+#include
/*
@@ -28,7 +29,7 @@
.globl __get_user_1
__get_user_1:
GET_THREAD_INFO(%edx)
- cmpl TI_ADDR_LIMIT(%edx),%eax
+ cmpl TI_addr_limit(%edx),%eax
jae bad_get_user
1: movzbl (%eax),%edx
xorl %eax,%eax
@@ -40,7 +41,7 @@ __get_user_2:
addl $1,%eax
jc bad_get_user
GET_THREAD_INFO(%edx)
- cmpl TI_ADDR_LIMIT(%edx),%eax
+ cmpl TI_addr_limit(%edx),%eax
jae bad_get_user
2: movzwl -1(%eax),%edx
xorl %eax,%eax
@@ -52,7 +53,7 @@ __get_user_4:
addl $3,%eax
jc bad_get_user
GET_THREAD_INFO(%edx)
- cmpl TI_ADDR_LIMIT(%edx),%eax
+ cmpl TI_addr_limit(%edx),%eax
jae bad_get_user
3: movl -3(%eax),%edx
xorl %eax,%eax
--- /dev/null 2002-08-30 16:31:37.000000000 -0700
+++ 25/arch/i386/lib/kgdb_serial.c 2004-02-20 00:20:42.000000000 -0800
@@ -0,0 +1,499 @@
+/*
+ * Serial interface GDB stub
+ *
+ * Written (hacked together) by David Grothe (dave@gcom.com)
+ * Modified to allow invokation early in boot see also
+ * kgdb.h for instructions by George Anzinger(george@mvista.com)
+ * Modified to handle debugging over ethernet by Robert Walsh
+ * and wangdi , based on
+ * code by San Mehat.
+ *
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifdef CONFIG_KGDB_USER_CONSOLE
+extern void kgdb_console_finit(void);
+#endif
+#define PRNT_off
+#define TEST_EXISTANCE
+#ifdef PRNT
+#define dbprintk(s) printk s
+#else
+#define dbprintk(s)
+#endif
+#define TEST_INTERRUPT_off
+#ifdef TEST_INTERRUPT
+#define intprintk(s) printk s
+#else
+#define intprintk(s)
+#endif
+
+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
+
+#define GDB_BUF_SIZE 512 /* power of 2, please */
+
+static char gdb_buf[GDB_BUF_SIZE];
+static int gdb_buf_in_inx;
+static atomic_t gdb_buf_in_cnt;
+static int gdb_buf_out_inx;
+
+struct async_struct *gdb_async_info;
+static int gdb_async_irq;
+
+#define outb_px(a,b) outb_p(b,a)
+
+static void program_uart(struct async_struct *info);
+static void write_char(struct async_struct *info, int chr);
+/*
+ * Get a byte from the hardware data buffer and return it
+ */
+static int
+read_data_bfr(struct async_struct *info)
+{
+ char it = inb_p(info->port + UART_LSR);
+
+ if (it & UART_LSR_DR)
+ return (inb_p(info->port + UART_RX));
+ /*
+ * If we have a framing error assume somebody messed with
+ * our uart. Reprogram it and send '-' both ways...
+ */
+ if (it & 0xc) {
+ program_uart(info);
+ write_char(info, '-');
+ return ('-');
+ }
+ return (-1);
+
+} /* read_data_bfr */
+
+/*
+ * Get a char if available, return -1 if nothing available.
+ * Empty the receive buffer first, then look at the interface hardware.
+
+ * Locking here is a bit of a problem. We MUST not lock out communication
+ * if we are trying to talk to gdb about a kgdb entry. ON the other hand
+ * we can loose chars in the console pass thru if we don't lock. It is also
+ * possible that we could hold the lock or be waiting for it when kgdb
+ * NEEDS to talk. Since kgdb locks down the world, it does not need locks.
+ * We do, of course have possible issues with interrupting a uart operation,
+ * but we will just depend on the uart status to help keep that straight.
+
+ */
+static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED;
+#ifdef CONFIG_SMP
+extern spinlock_t kgdb_spinlock;
+#endif
+
+static int
+read_char(struct async_struct *info)
+{
+ int chr;
+ unsigned long flags;
+ local_irq_save(flags);
+#ifdef CONFIG_SMP
+ if (!spin_is_locked(&kgdb_spinlock)) {
+ spin_lock(&uart_interrupt_lock);
+ }
+#endif
+ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */
+ chr = gdb_buf[gdb_buf_out_inx++];
+ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1);
+ atomic_dec(&gdb_buf_in_cnt);
+ } else {
+ chr = read_data_bfr(info);
+ }
+#ifdef CONFIG_SMP
+ if (!spin_is_locked(&kgdb_spinlock)) {
+ spin_unlock(&uart_interrupt_lock);
+ }
+#endif
+ local_irq_restore(flags);
+ return (chr);
+}
+
+/*
+ * Wait until the interface can accept a char, then write it.
+ */
+static void
+write_char(struct async_struct *info, int chr)
+{
+ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ;
+
+ outb_p(chr, info->port + UART_TX);
+
+} /* write_char */
+
+/*
+ * Mostly we don't need a spinlock, but since the console goes
+ * thru here with interrutps on, well, we need to catch those
+ * chars.
+ */
+/*
+ * This is the receiver interrupt routine for the GDB stub.
+ * It will receive a limited number of characters of input
+ * from the gdb host machine and save them up in a buffer.
+ *
+ * When the gdb stub routine tty_getDebugChar() is called it
+ * draws characters out of the buffer until it is empty and
+ * then reads directly from the serial port.
+ *
+ * We do not attempt to write chars from the interrupt routine
+ * since the stubs do all of that via tty_putDebugChar() which
+ * writes one byte after waiting for the interface to become
+ * ready.
+ *
+ * The debug stubs like to run with interrupts disabled since,
+ * after all, they run as a consequence of a breakpoint in
+ * the kernel.
+ *
+ * Perhaps someone who knows more about the tty driver than I
+ * care to learn can make this work for any low level serial
+ * driver.
+ */
+static irqreturn_t
+gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct async_struct *info;
+ unsigned long flags;
+
+ info = gdb_async_info;
+ if (!info || !info->tty || irq != gdb_async_irq)
+ return IRQ_NONE;
+
+ local_irq_save(flags);
+ spin_lock(&uart_interrupt_lock);
+ do {
+ int chr = read_data_bfr(info);
+ intprintk(("Debug char on int: %x hex\n", chr));
+ if (chr < 0)
+ continue;
+
+ if (chr == 3) { /* Ctrl-C means remote interrupt */
+ BREAKPOINT;
+ continue;
+ }
+
+ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) {
+ /* buffer overflow tosses early char */
+ read_char(info);
+ }
+ gdb_buf[gdb_buf_in_inx++] = chr;
+ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1);
+ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI);
+ spin_unlock(&uart_interrupt_lock);
+ local_irq_restore(flags);
+ return IRQ_HANDLED;
+} /* gdb_interrupt */
+
+/*
+ * Just a NULL routine for testing.
+ */
+void
+gdb_null(void)
+{
+} /* gdb_null */
+
+/* These structure are filled in with values defined in asm/kgdb_local.h
+ */
+static struct serial_state state = SB_STATE;
+static struct async_struct local_info = SB_INFO;
+static int ok_to_enable_ints = 0;
+static void kgdb_enable_ints_now(void);
+
+extern char *kgdb_version;
+/*
+ * Hook an IRQ for KGDB.
+ *
+ * This routine is called from tty_putDebugChar, below.
+ */
+static int ints_disabled = 1;
+int
+gdb_hook_interrupt(struct async_struct *info, int verb)
+{
+ struct serial_state *state = info->state;
+ unsigned long flags;
+ int port;
+#ifdef TEST_EXISTANCE
+ int scratch, scratch2;
+#endif
+
+ /* The above fails if memory managment is not set up yet.
+ * Rather than fail the set up, just keep track of the fact
+ * and pick up the interrupt thing later.
+ */
+ gdb_async_info = info;
+ port = gdb_async_info->port;
+ gdb_async_irq = state->irq;
+ if (verb) {
+ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n",
+ kgdb_version,
+ port,
+ gdb_async_irq, gdb_async_info->state->custom_divisor);
+ }
+ local_irq_save(flags);
+#ifdef TEST_EXISTANCE
+ /* Existance test */
+ /* Should not need all this, but just in case.... */
+
+ scratch = inb_p(port + UART_IER);
+ outb_px(port + UART_IER, 0);
+ outb_px(0xff, 0x080);
+ scratch2 = inb_p(port + UART_IER);
+ outb_px(port + UART_IER, scratch);
+ if (scratch2) {
+ printk
+ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n");
+ local_irq_restore(flags);
+ return 1; /* We failed; there's nothing here */
+ }
+ scratch2 = inb_p(port + UART_LCR);
+ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */
+ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */
+ outb_px(port + UART_LCR, 0);
+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO);
+ scratch = inb_p(port + UART_IIR) >> 6;
+ if (scratch == 1) {
+ printk("gdb_hook_interrupt: Undefined UART type!"
+ " Not a UART! \n");
+ local_irq_restore(flags);
+ return 1;
+ } else {
+ dbprintk(("gdb_hook_interrupt: UART type "
+ "is %d where 0=16450, 2=16550 3=16550A\n", scratch));
+ }
+ scratch = inb_p(port + UART_MCR);
+ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch);
+ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A);
+ scratch2 = inb_p(port + UART_MSR) & 0xF0;
+ outb_px(port + UART_MCR, scratch);
+ if (scratch2 != 0x90) {
+ printk("gdb_hook_interrupt: "
+ "Loop back test failed! Not a UART!\n");
+ local_irq_restore(flags);
+ return scratch2 + 1000; /* force 0 to fail */
+ }
+#endif /* test existance */
+ program_uart(info);
+ local_irq_restore(flags);
+
+ return (0);
+
+} /* gdb_hook_interrupt */
+
+static void
+program_uart(struct async_struct *info)
+{
+ int port = info->port;
+
+ (void) inb_p(port + UART_RX);
+ outb_px(port + UART_IER, 0);
+
+ (void) inb_p(port + UART_RX); /* serial driver comments say */
+ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */
+ (void) inb_p(port + UART_MSR);
+ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB);
+ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */
+ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */
+ outb_px(port + UART_MCR, info->MCR);
+
+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */
+ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */
+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */
+ if (!ints_disabled) {
+ intprintk(("KGDB: Sending %d to port %x offset %d\n",
+ gdb_async_info->IER,
+ (int) gdb_async_info->port, UART_IER));
+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER);
+ }
+ return;
+}
+
+/*
+ * tty_getDebugChar
+ *
+ * This is a GDB stub routine. It waits for a character from the
+ * serial interface and then returns it. If there is no serial
+ * interface connection then it returns a bogus value which will
+ * almost certainly cause the system to hang. In the
+ */
+int kgdb_in_isr = 0;
+int kgdb_in_lsr = 0;
+extern spinlock_t kgdb_spinlock;
+
+/* Caller takes needed protections */
+
+int
+tty_getDebugChar(void)
+{
+ volatile int chr, dum, time, end_time;
+
+ dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port));
+
+ if (gdb_async_info == NULL) {
+ gdb_hook_interrupt(&local_info, 0);
+ }
+ /*
+ * This trick says if we wait a very long time and get
+ * no char, return the -1 and let the upper level deal
+ * with it.
+ */
+ rdtsc(dum, time);
+ end_time = time + 2;
+ while (((chr = read_char(gdb_async_info)) == -1) &&
+ (end_time - time) > 0) {
+ rdtsc(dum, time);
+ };
+ /*
+ * This covers our butts if some other code messes with
+ * our uart, hay, it happens :o)
+ */
+ if (chr == -1)
+ program_uart(gdb_async_info);
+
+ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' '));
+ return (chr);
+
+} /* tty_getDebugChar */
+
+static int count = 3;
+static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED;
+
+static int __init
+kgdb_enable_ints(void)
+{
+ if (kgdboe) {
+ return 0;
+ }
+ if (gdb_async_info == NULL) {
+ gdb_hook_interrupt(&local_info, 1);
+ }
+ ok_to_enable_ints = 1;
+ kgdb_enable_ints_now();
+#ifdef CONFIG_KGDB_USER_CONSOLE
+ kgdb_console_finit();
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_SERIAL_8250
+void shutdown_for_kgdb(struct async_struct *gdb_async_info);
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+static inline int kgdb_mem_init_done(void)
+{
+ return highmem_start_page != NULL;
+}
+#else
+static inline int kgdb_mem_init_done(void)
+{
+ return max_mapnr != 0;
+}
+#endif
+
+static void
+kgdb_enable_ints_now(void)
+{
+ if (!spin_trylock(&one_at_atime))
+ return;
+ if (!ints_disabled)
+ goto exit;
+ if (kgdb_mem_init_done() &&
+ ints_disabled) { /* don't try till mem init */
+#ifdef CONFIG_SERIAL_8250
+ /*
+ * The ifdef here allows the system to be configured
+ * without the serial driver.
+ * Don't make it a module, however, it will steal the port
+ */
+ shutdown_for_kgdb(gdb_async_info);
+#endif
+ ints_disabled = request_irq(gdb_async_info->state->irq,
+ gdb_interrupt,
+ IRQ_T(gdb_async_info),
+ "KGDB-stub", NULL);
+ intprintk(("KGDB: request_irq returned %d\n", ints_disabled));
+ }
+ if (!ints_disabled) {
+ intprintk(("KGDB: Sending %d to port %x offset %d\n",
+ gdb_async_info->IER,
+ (int) gdb_async_info->port, UART_IER));
+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER);
+ }
+ exit:
+ spin_unlock(&one_at_atime);
+}
+
+/*
+ * tty_putDebugChar
+ *
+ * This is a GDB stub routine. It waits until the interface is ready
+ * to transmit a char and then sends it. If there is no serial
+ * interface connection then it simply returns to its caller, having
+ * pretended to send the char. Caller takes needed protections.
+ */
+void
+tty_putDebugChar(int chr)
+{
+ dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n",
+ gdb_async_info->port,
+ chr,
+ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1));
+
+ if (gdb_async_info == NULL) {
+ gdb_hook_interrupt(&local_info, 0);
+ }
+
+ write_char(gdb_async_info, chr); /* this routine will wait */
+ count = (chr == '#') ? 0 : count + 1;
+ if ((count == 2)) { /* try to enable after */
+ if (ints_disabled & ok_to_enable_ints)
+ kgdb_enable_ints_now(); /* try to enable after */
+
+ /* We do this a lot because, well we really want to get these
+ * interrupts. The serial driver will clear these bits when it
+ * initializes the chip. Every thing else it does is ok,
+ * but this.
+ */
+ if (!ints_disabled) {
+ outb_px(gdb_async_info->port + UART_IER,
+ gdb_async_info->IER);
+ }
+ }
+
+} /* tty_putDebugChar */
+
+/*
+ * This does nothing for the serial port, since it doesn't buffer.
+ */
+
+void tty_flushDebugChar(void)
+{
+}
+
+module_init(kgdb_enable_ints);
--- linux-2.6.3/arch/i386/lib/Makefile 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/lib/Makefile 2004-02-20 00:20:41.000000000 -0800
@@ -9,4 +9,5 @@ lib-y = checksum.o delay.o \
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+lib-$(CONFIG_KGDB) += kgdb_serial.o
lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o
--- linux-2.6.3/arch/i386/lib/usercopy.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/lib/usercopy.c 2004-02-20 00:21:54.000000000 -0800
@@ -76,7 +76,7 @@ do { \
* and returns @count.
*/
long
-__strncpy_from_user(char *dst, const char __user *src, long count)
+__direct_strncpy_from_user(char *dst, const char __user *src, long count)
{
long res;
__do_strncpy_from_user(dst, src, count, res);
@@ -102,7 +102,7 @@ __strncpy_from_user(char *dst, const cha
* and returns @count.
*/
long
-strncpy_from_user(char *dst, const char __user *src, long count)
+direct_strncpy_from_user(char *dst, const char __user *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, 1))
@@ -147,7 +147,7 @@ do { \
* On success, this will be zero.
*/
unsigned long
-clear_user(void __user *to, unsigned long n)
+direct_clear_user(void __user *to, unsigned long n)
{
might_sleep();
if (access_ok(VERIFY_WRITE, to, n))
@@ -167,7 +167,7 @@ clear_user(void __user *to, unsigned lon
* On success, this will be zero.
*/
unsigned long
-__clear_user(void __user *to, unsigned long n)
+__direct_clear_user(void __user *to, unsigned long n)
{
__do_clear_user(to, n);
return n;
@@ -184,7 +184,7 @@ __clear_user(void __user *to, unsigned l
* On exception, returns 0.
* If the string is too long, returns a value greater than @n.
*/
-long strnlen_user(const char __user *s, long n)
+long direct_strnlen_user(const char __user *s, long n)
{
unsigned long mask = -__addr_ok(s);
unsigned long res, tmp;
@@ -575,3 +575,4 @@ unsigned long __copy_from_user_ll(void *
n = __copy_user_zeroing_intel(to, (const void *) from, n);
return n;
}
+
--- linux-2.6.3/arch/i386/Makefile 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/Makefile 2004-02-20 00:20:58.000000000 -0800
@@ -19,7 +19,7 @@ LDFLAGS := -m elf_i386
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
LDFLAGS_vmlinux :=
-CFLAGS += -pipe
+CFLAGS += -pipe -msoft-float
# prevent gcc from keeping the stack 16 byte aligned
CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,)
@@ -34,8 +34,9 @@ cflags-$(CONFIG_M586MMX) += $(call check
cflags-$(CONFIG_M686) += -march=i686
cflags-$(CONFIG_MPENTIUMII) += $(call check_gcc,-march=pentium2,-march=i686)
cflags-$(CONFIG_MPENTIUMIII) += $(call check_gcc,-march=pentium3,-march=i686)
+cflags-$(CONFIG_MPENTIUMM) += $(call check_gcc,-march=pentium3,-march=i686)
cflags-$(CONFIG_MPENTIUM4) += $(call check_gcc,-march=pentium4,-march=i686)
-cflags-$(CONFIG_MK6) += $(call check_gcc,-march=k6,-march=i586)
+cflags-$(CONFIG_MK6) += -march=k6
# Please note, that patches that add -march=athlon-xp and friends are pointless.
# They make zero difference whatsosever to performance at this time.
cflags-$(CONFIG_MK7) += $(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4)
@@ -47,6 +48,18 @@ cflags-$(CONFIG_MWINCHIP3D) += $(call ch
cflags-$(CONFIG_MCYRIXIII) += $(call check_gcc,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MVIAC3_2) += $(call check_gcc,-march=c3-2,-march=i686)
+# AMD Elan support
+cflags-$(CONFIG_X86_ELAN) += -march=i486
+
+# -mregparm=3 works ok on gcc-3.0 and later
+#
+GCC_VERSION := $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
+cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
+
+# Enable unit-at-a-time mode when possible. It shrinks the
+# kernel considerably.
+CFLAGS += $(call check_gcc,-funit-at-a-time,)
+
CFLAGS += $(cflags-y)
# Default subarch .c files
@@ -84,6 +97,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700
# default subarch .h files
mflags-y += -Iinclude/asm-i386/mach-default
+mflags-$(CONFIG_KGDB) += -gdwarf-2
+mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g')
+
head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o
libs-y += arch/i386/lib/
--- linux-2.6.3/arch/i386/math-emu/fpu_system.h 2003-11-09 16:45:04.000000000 -0800
+++ 25/arch/i386/math-emu/fpu_system.h 2004-02-20 00:21:54.000000000 -0800
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
/* This sets the pointer FPU_info to point to the argument part
of the stack frame of math_emulate() */
@@ -22,7 +23,7 @@
/* s is always from a cpu register, and the cpu does bounds checking
* during register load --> no further bounds checks needed */
-#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
+#define LDT_DESCRIPTOR(s) (((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3])
#define SEG_D_SIZE(x) ((x).b & (3 << 21))
#define SEG_G_BIT(x) ((x).b & (1 << 23))
#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
--- linux-2.6.3/arch/i386/mm/fault.c 2003-12-17 21:20:01.000000000 -0800
+++ 25/arch/i386/mm/fault.c 2004-02-20 00:21:54.000000000 -0800
@@ -27,6 +27,7 @@
#include
#include
#include
+#include
extern void die(const char *,struct pt_regs *,long);
@@ -104,8 +105,17 @@ static inline unsigned long get_segment_
if (seg & (1<<2)) {
/* Must lock the LDT while reading it. */
down(¤t->mm->context.sem);
+#if 1
+ /* horrible hack for 4/4 disabled kernels.
+ I'm not quite sure what the TLB flush is good for,
+ it's mindlessly copied from the read_ldt code */
+ __flush_tlb_global();
+ desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]);
+ desc = (void *)desc + ((seg & ~7) % PAGE_SIZE);
+#else
desc = current->mm->context.ldt;
desc = (void *)desc + (seg & ~7);
+#endif
} else {
/* Must disable preemption while reading the GDT. */
desc = (u32 *)&cpu_gdt_table[get_cpu()];
@@ -118,6 +128,9 @@ static inline unsigned long get_segment_
(desc[1] & 0xff000000);
if (seg & (1<<2)) {
+#if 1
+ kunmap((void *)((unsigned long)desc & PAGE_MASK));
+#endif
up(¤t->mm->context.sem);
} else
put_cpu();
@@ -243,6 +256,19 @@ asmlinkage void do_page_fault(struct pt_
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 1) == 0.
*/
+#ifdef CONFIG_X86_4G
+ /*
+ * On 4/4 all kernels faults are either bugs, vmalloc or prefetch
+ */
+ if (unlikely((regs->xcs & 3) == 0)) {
+ if (error_code & 3)
+ goto bad_area_nosemaphore;
+
+ /* If it's vm86 fall through */
+ if (!(regs->eflags & VM_MASK))
+ goto vmalloc_fault;
+ }
+#else
if (unlikely(address >= TASK_SIZE)) {
if (!(error_code & 5))
goto vmalloc_fault;
@@ -252,6 +278,7 @@ asmlinkage void do_page_fault(struct pt_
*/
goto bad_area_nosemaphore;
}
+#endif
mm = tsk->mm;
@@ -403,6 +430,12 @@ no_context:
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
+#ifdef CONFIG_KGDB
+ if (!user_mode(regs)){
+ kgdb_handle_exception(14,SIGBUS, error_code, regs);
+ return;
+ }
+#endif
bust_spinlocks(1);
--- linux-2.6.3/arch/i386/mm/hugetlbpage.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/mm/hugetlbpage.c 2004-02-20 00:21:35.000000000 -0800
@@ -61,6 +61,27 @@ static struct page *alloc_fresh_huge_pag
static void free_huge_page(struct page *page);
+#ifdef CONFIG_NUMA
+
+static inline void huge_inc_rss(struct mm_struct *mm, struct page *page)
+{
+ mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ mm->pernode_rss[page_nodenum(page)] += (HPAGE_SIZE / PAGE_SIZE);
+}
+
+static inline void huge_dec_rss(struct mm_struct *mm, struct page *page)
+{
+ mm->rss -= (HPAGE_SIZE / PAGE_SIZE);
+ mm->pernode_rss[page_nodenum(page)] -= (HPAGE_SIZE / PAGE_SIZE);
+}
+
+#else /* !CONFIG_NUMA */
+
+#define huge_inc_rss(mm, page) ((mm)->rss += (HPAGE_SIZE / PAGE_SIZE))
+#define huge_dec_rss(mm, page) ((mm)->rss -= (HPAGE_SIZE / PAGE_SIZE))
+
+#endif /* CONFIG_NUMA */
+
static struct page *alloc_hugetlb_page(void)
{
int i;
@@ -105,7 +126,7 @@ static void set_huge_pte(struct mm_struc
{
pte_t entry;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ huge_inc_rss(mm, page);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -145,7 +166,7 @@ int copy_hugetlb_page_range(struct mm_st
ptepage = pte_page(entry);
get_page(ptepage);
set_pte(dst_pte, entry);
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ huge_inc_rss(dst, ptepage);
addr += HPAGE_SIZE;
}
return 0;
@@ -314,8 +335,8 @@ void unmap_hugepage_range(struct vm_area
page = pte_page(*pte);
huge_page_release(page);
pte_clear(pte);
+ huge_dec_rss(mm, page);
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
flush_tlb_range(vma, start, end);
}
--- linux-2.6.3/arch/i386/mm/init.c 2004-01-09 00:04:30.000000000 -0800
+++ 25/arch/i386/mm/init.c 2004-02-20 00:21:54.000000000 -0800
@@ -40,125 +40,13 @@
#include
#include
#include
+#include
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
unsigned long highstart_pfn, highend_pfn;
static int do_test_wp_bit(void);
-/*
- * Creates a middle page table and puts a pointer to it in the
- * given global directory entry. This only returns the gd entry
- * in non-PAE compilation mode, since the middle layer is folded.
- */
-static pmd_t * __init one_md_table_init(pgd_t *pgd)
-{
- pmd_t *pmd_table;
-
-#ifdef CONFIG_X86_PAE
- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- if (pmd_table != pmd_offset(pgd, 0))
- BUG();
-#else
- pmd_table = pmd_offset(pgd, 0);
-#endif
-
- return pmd_table;
-}
-
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static pte_t * __init one_page_table_init(pmd_t *pmd)
-{
- if (pmd_none(*pmd)) {
- pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
- if (page_table != pte_offset_kernel(pmd, 0))
- BUG();
-
- return page_table;
- }
-
- return pte_offset_kernel(pmd, 0);
-}
-
-/*
- * This function initializes a certain range of kernel virtual memory
- * with new bootmem page tables, everywhere page tables are missing in
- * the given range.
- */
-
-/*
- * NOTE: The pagetables are allocated contiguous on the physical space
- * so we can cache the place of the first one and move around without
- * checking the pgd every time.
- */
-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- int pgd_idx, pmd_idx;
- unsigned long vaddr;
-
- vaddr = start;
- pgd_idx = pgd_index(vaddr);
- pmd_idx = pmd_index(vaddr);
- pgd = pgd_base + pgd_idx;
-
- for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
- if (pgd_none(*pgd))
- one_md_table_init(pgd);
-
- pmd = pmd_offset(pgd, vaddr);
- for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (pmd_none(*pmd))
- one_page_table_init(pmd);
-
- vaddr += PMD_SIZE;
- }
- pmd_idx = 0;
- }
-}
-
-/*
- * This maps the physical memory to kernel virtual address space, a total
- * of max_low_pfn pages, by creating page tables starting from address
- * PAGE_OFFSET.
- */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
-{
- unsigned long pfn;
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- int pgd_idx, pmd_idx, pte_ofs;
-
- pgd_idx = pgd_index(PAGE_OFFSET);
- pgd = pgd_base + pgd_idx;
- pfn = 0;
-
- for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
- pmd = one_md_table_init(pgd);
- if (pfn >= max_low_pfn)
- continue;
- for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
- /* Map with big pages if possible, otherwise create normal page tables. */
- if (cpu_has_pse) {
- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
- pfn += PTRS_PER_PTE;
- } else {
- pte = one_page_table_init(pmd);
-
- for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++)
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
- }
- }
- }
-}
-
static inline int page_kills_ppro(unsigned long pagenr)
{
if (pagenr >= 0x70000 && pagenr <= 0x7003F)
@@ -206,11 +94,8 @@ static inline int page_is_ram(unsigned l
return 0;
}
-#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
-pgprot_t kmap_prot;
-EXPORT_SYMBOL(kmap_prot);
EXPORT_SYMBOL(kmap_pte);
#define kmap_get_fixmap_pte(vaddr) \
@@ -218,29 +103,7 @@ EXPORT_SYMBOL(kmap_pte);
void __init kmap_init(void)
{
- unsigned long kmap_vstart;
-
- /* cache the first kmap pte */
- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
- kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
- kmap_prot = PAGE_KERNEL;
-}
-
-void __init permanent_kmaps_init(pgd_t *pgd_base)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long vaddr;
-
- vaddr = PKMAP_BASE;
- page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- pmd = pmd_offset(pgd, vaddr);
- pte = pte_offset_kernel(pmd, vaddr);
- pkmap_page_table = pte;
+ kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN));
}
void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
@@ -255,6 +118,8 @@ void __init one_highpage_init(struct pag
SetPageReserved(page);
}
+#ifdef CONFIG_HIGHMEM
+
#ifndef CONFIG_DISCONTIGMEM
void __init set_highmem_pages_init(int bad_ppro)
{
@@ -266,12 +131,9 @@ void __init set_highmem_pages_init(int b
#else
extern void set_highmem_pages_init(int);
#endif /* !CONFIG_DISCONTIGMEM */
-
#else
-#define kmap_init() do { } while (0)
-#define permanent_kmaps_init(pgd_base) do { } while (0)
-#define set_highmem_pages_init(bad_ppro) do { } while (0)
-#endif /* CONFIG_HIGHMEM */
+# define set_highmem_pages_init(bad_ppro) do { } while (0)
+#endif
unsigned long __PAGE_KERNEL = _PAGE_KERNEL;
@@ -281,30 +143,125 @@ unsigned long __PAGE_KERNEL = _PAGE_KERN
extern void __init remap_numa_kva(void);
#endif
-static void __init pagetable_init (void)
+static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_base + pgd_index(address);
+ pmd = pmd_offset(pgd, address);
+ if (!pmd_present(*pmd)) {
+ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));
+ }
+}
+
+static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
{
unsigned long vaddr;
- pgd_t *pgd_base = swapper_pg_dir;
+ for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE)
+ prepare_pagetables(pgd_base, vaddr);
+}
+
+void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
+{
+ unsigned long vaddr;
+ pgd_t *pgd;
+ int i, j, k;
+ pmd_t *pmd;
+ pte_t *pte, *pte_base;
+
+ pgd = pgd_base;
+
+ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+ vaddr = i*PGDIR_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ pmd = pmd_offset(pgd, 0);
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ if (vaddr < start)
+ continue;
+ if (cpu_has_pse) {
+ unsigned long __pe;
+
+ set_in_cr4(X86_CR4_PSE);
+ boot_cpu_data.wp_works_ok = 1;
+ __pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start;
+ /* Make it "global" too if supported */
+ if (cpu_has_pge) {
+ set_in_cr4(X86_CR4_PGE);
+#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+ __pe += _PAGE_GLOBAL;
+ __PAGE_KERNEL |= _PAGE_GLOBAL;
+#endif
+ }
+ set_pmd(pmd, __pmd(__pe));
+ continue;
+ }
+ if (!pmd_present(*pmd))
+ pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ else
+ pte_base = (pte_t *) page_address(pmd_page(*pmd));
+ pte = pte_base;
+ for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ if (vaddr < start)
+ continue;
+ *pte = mk_pte_phys(vaddr-start, PAGE_KERNEL);
+ }
+ set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+ }
+ }
+}
+
+static void __init pagetable_init (void)
+{
+ unsigned long vaddr, end;
+ pgd_t *pgd_base;
#ifdef CONFIG_X86_PAE
int i;
- /* Init entries of the first-level page table to the zero page */
- for (i = 0; i < PTRS_PER_PGD; i++)
- set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
#endif
- /* Enable PSE if available */
- if (cpu_has_pse) {
- set_in_cr4(X86_CR4_PSE);
- }
+ /*
+ * This can be zero as well - no problem, in that case we exit
+ * the loops anyway due to the PTRS_PER_* conditions.
+ */
+ end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
- /* Enable PGE if available */
- if (cpu_has_pge) {
- set_in_cr4(X86_CR4_PGE);
- __PAGE_KERNEL |= _PAGE_GLOBAL;
+ pgd_base = swapper_pg_dir;
+#ifdef CONFIG_X86_PAE
+ /*
+ * It causes too many problems if there's no proper pmd set up
+ * for all 4 entries of the PGD - so we allocate all of them.
+ * PAE systems will not miss this extra 4-8K anyway ...
+ */
+ for (i = 0; i < PTRS_PER_PGD; i++) {
+ pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1));
}
+#endif
+ /*
+ * Set up lowmem-sized identity mappings at PAGE_OFFSET:
+ */
+ setup_identity_mappings(pgd_base, PAGE_OFFSET, end);
- kernel_physical_mapping_init(pgd_base);
+ /*
+ * Add flat-mode identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. (In the non-PAE
+ * case we already have these mappings through head.S.)
+ * All user-space mappings are explicitly cleared after
+ * SMP startup.
+ */
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE)
+ setup_identity_mappings(pgd_base, 0, 16*1024*1024);
+#endif
remap_numa_kva();
/*
@@ -312,38 +269,64 @@ static void __init pagetable_init (void)
* created - mappings will be set by set_fixmap():
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- page_table_range_init(vaddr, 0, pgd_base);
+ fixrange_init(vaddr, 0, pgd_base);
- permanent_kmaps_init(pgd_base);
+#ifdef CONFIG_HIGHMEM
+ {
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
-#ifdef CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+ /*
+ * Permanent kmaps:
+ */
+ vaddr = PKMAP_BASE;
+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ pmd = pmd_offset(pgd, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
+ pkmap_page_table = pte;
+ }
#endif
}
-void zap_low_mappings (void)
+/*
+ * Clear kernel pagetables in a PMD_SIZE-aligned range.
+ */
+static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
{
- int i;
+ unsigned long vaddr;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ int i, j;
+
+ pgd = pgd_base;
+
+ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
+ vaddr = i*PGDIR_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ pmd = pmd_offset(pgd, 0);
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+ if (end && (vaddr >= end))
+ break;
+ if (vaddr < start)
+ continue;
+ pmd_clear(pmd);
+ }
+ }
+ flush_tlb_all();
+}
+
+void zap_low_mappings(void)
+{
+ printk("zapping low mappings.\n");
/*
* Zap initial low-memory mappings.
- *
- * Note that "pgd_clear()" doesn't do it for
- * us, because pgd_clear() is a no-op on i386.
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
- set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
- set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
- flush_tlb_all();
+ clear_mappings(swapper_pg_dir, 0, 16*1024*1024);
}
#ifndef CONFIG_DISCONTIGMEM
@@ -393,7 +376,15 @@ void __init paging_init(void)
set_in_cr4(X86_CR4_PAE);
#endif
__flush_tlb_all();
-
+ /*
+ * Subtle. SMP is doing it's boot stuff late (because it has to
+ * fork idle threads) - but it also needs low mappings for the
+ * protected-mode entry to work. We zap these entries only after
+ * the WP-bit has been tested.
+ */
+#ifndef CONFIG_SMP
+ zap_low_mappings();
+#endif
kmap_init();
zone_sizes_init();
}
@@ -515,22 +506,18 @@ void __init mem_init(void)
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
- /*
- * Subtle. SMP is doing it's boot stuff late (because it has to
- * fork idle threads) - but it also needs low mappings for the
- * protected-mode entry to work. We zap these entries only after
- * the WP-bit has been tested.
- */
-#ifndef CONFIG_SMP
- zap_low_mappings();
-#endif
+ entry_trampoline_setup();
+ default_ldt_page = virt_to_page(default_ldt);
+ load_LDT(&init_mm.context);
}
-kmem_cache_t *pgd_cache;
-kmem_cache_t *pmd_cache;
+kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
void __init pgtable_cache_init(void)
{
+ void (*ctor)(void *, kmem_cache_t *, unsigned long);
+ void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
if (PTRS_PER_PMD > 1) {
pmd_cache = kmem_cache_create("pmd",
PTRS_PER_PMD*sizeof(pmd_t),
@@ -540,13 +527,36 @@ void __init pgtable_cache_init(void)
NULL);
if (!pmd_cache)
panic("pgtable_cache_init(): cannot create pmd cache");
+
+ if (TASK_SIZE > PAGE_OFFSET) {
+ kpmd_cache = kmem_cache_create("kpmd",
+ PTRS_PER_PMD*sizeof(pmd_t),
+ 0,
+ SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
+ kpmd_ctor,
+ NULL);
+ if (!kpmd_cache)
+ panic("pgtable_cache_init(): "
+ "cannot create kpmd cache");
+ }
}
+
+ if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET)
+ ctor = pgd_ctor;
+ else
+ ctor = NULL;
+
+ if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET)
+ dtor = pgd_dtor;
+ else
+ dtor = NULL;
+
pgd_cache = kmem_cache_create("pgd",
PTRS_PER_PGD*sizeof(pgd_t),
0,
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
- pgd_ctor,
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+ ctor,
+ dtor);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
--- linux-2.6.3/arch/i386/mm/pgtable.c 2003-11-09 16:45:05.000000000 -0800
+++ 25/arch/i386/mm/pgtable.c 2004-02-20 00:21:54.000000000 -0800
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
void show_mem(void)
{
@@ -157,11 +158,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
}
+void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
+{
+ pmd_t *kpmd, *pmd;
+ kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
+ (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
+ pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
+
+ memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
+ memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
+}
+
/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
+ * List of all pgd's needed so it can invalidate entries in both cached
+ * and uncached pgd's. This is essentially codepath-based locking
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
@@ -170,30 +180,60 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
* could be used. The locking scheme was chosen on the basis of
* manfred's recommendations and having no core impact whatsoever.
* -- wli
+ *
+ * The entire issue goes away when XKVA is configured.
*/
spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
LIST_HEAD(pgd_list);
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+/*
+ * This is not that hard to figure out.
+ * (a) PTRS_PER_PMD == 1 means non-PAE.
+ * (b) PTRS_PER_PMD > 1 means PAE.
+ * (c) TASK_SIZE > PAGE_OFFSET means XKVA.
+ * (d) TASK_SIZE <= PAGE_OFFSET means non-XKVA.
+ *
+ * Do *NOT* back out the preconstruction like the patch I'm cleaning
+ * up after this very instant did, or at all, for that matter.
+ * This is never called when PTRS_PER_PMD > 1 && TASK_SIZE > PAGE_OFFSET.
+ * -- wli
+ */
+void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
{
+ pgd_t *pgd = (pgd_t *)__pgd;
unsigned long flags;
- if (PTRS_PER_PMD == 1)
- spin_lock_irqsave(&pgd_lock, flags);
+ if (PTRS_PER_PMD == 1) {
+ if (TASK_SIZE <= PAGE_OFFSET)
+ spin_lock_irqsave(&pgd_lock, flags);
+ else
+ memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
+ &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
+ NR_SHARED_PMDS * sizeof(pgd_t));
+ }
- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ if (TASK_SIZE <= PAGE_OFFSET)
+ memcpy(pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
if (PTRS_PER_PMD > 1)
return;
- list_add(&virt_to_page(pgd)->lru, &pgd_list);
- spin_unlock_irqrestore(&pgd_lock, flags);
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ if (TASK_SIZE > PAGE_OFFSET)
+ memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
+ else {
+ list_add(&virt_to_page(pgd)->lru, &pgd_list);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ }
}
-/* never called when PTRS_PER_PMD > 1 */
+/*
+ * Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET
+ * for with PAE we would list_del() multiple times, and for non-PAE
+ * with XKVA all the AGP pgd shootdown code is unnecessary.
+ */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
@@ -203,6 +243,12 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
spin_unlock_irqrestore(&pgd_lock, flags);
}
+/*
+ * See the comments above pgd_ctor() wrt. preconstruction.
+ * Do *NOT* memcpy() here. If you do, you back out important
+ * anti- cache pollution code.
+ *
+ */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
@@ -211,15 +257,33 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;
+ /*
+ * In the 4G userspace case alias the top 16 MB virtual
+ * memory range into the user mappings as well (these
+ * include the trampoline and CPU data structures).
+ */
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ kmem_cache_t *cache;
+ pmd_t *pmd;
+
+ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+ cache = kpmd_cache;
+ else
+ cache = pmd_cache;
+
+ pmd = kmem_cache_alloc(cache, GFP_KERNEL);
if (!pmd)
goto out_oom;
set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
}
- return pgd;
+ return pgd;
out_oom:
+ /*
+ * we don't have to handle the kpmd_cache here, since it's the
+ * last allocation, and has either nothing to free or when it
+ * succeeds the whole operation succeeds.
+ */
for (i--; i >= 0; i--)
kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pgd_cache, pgd);
@@ -230,10 +294,29 @@ void pgd_free(pgd_t *pgd)
{
int i;
- /* in the PAE case user pgd entries are overwritten before usage */
- if (PTRS_PER_PMD > 1)
- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
/* in the non-PAE case, clear_page_tables() clears user pgd entries */
+ if (PTRS_PER_PMD == 1)
+ goto out_free;
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ kmem_cache_t *cache;
+ pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
+
+ /*
+ * only userspace pmd's are cleared for us
+ * by mm/memory.c; it's a slab cache invariant
+ * that we must separate the kernel pmd slab
+ * all times, else we'll have bad pmd's.
+ */
+ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
+ cache = kpmd_cache;
+ else
+ cache = pmd_cache;
+
+ kmem_cache_free(cache, pmd);
+ }
+out_free:
kmem_cache_free(pgd_cache, pgd);
}
+
--- linux-2.6.3/arch/i386/oprofile/nmi_int.c 2003-09-27 18:57:43.000000000 -0700
+++ 25/arch/i386/oprofile/nmi_int.c 2004-02-20 00:20:36.000000000 -0800
@@ -65,14 +65,14 @@ static int __init init_driverfs(void)
{
int error;
if (!(error = sysdev_class_register(&oprofile_sysclass)))
- error = sys_device_register(&device_oprofile);
+ error = sysdev_register(&device_oprofile);
return error;
}
static void __exit exit_driverfs(void)
{
- sys_device_unregister(&device_oprofile);
+ sysdev_unregister(&device_oprofile);
sysdev_class_unregister(&oprofile_sysclass);
}
@@ -295,8 +295,6 @@ struct oprofile_operations nmi_ops = {
};
-#if !defined(CONFIG_X86_64)
-
static int __init p4_init(void)
{
__u8 cpu_model = current_cpu_data.x86_model;
@@ -335,7 +333,9 @@ static int __init ppro_init(void)
if (cpu_model > 0xd)
return 0;
- if (cpu_model > 5) {
+ if (cpu_model == 9) {
+ nmi_ops.cpu_type = "i386/p6_mobile";
+ } else if (cpu_model > 5) {
nmi_ops.cpu_type = "i386/piii";
} else if (cpu_model > 2) {
nmi_ops.cpu_type = "i386/pii";
@@ -347,9 +347,6 @@ static int __init ppro_init(void)
return 1;
}
-#endif /* !CONFIG_X86_64 */
-
-
/* in order to get driverfs right */
static int using_nmi;
@@ -381,7 +378,6 @@ int __init nmi_init(struct oprofile_oper
}
break;
-#if !defined(CONFIG_X86_64)
case X86_VENDOR_INTEL:
switch (family) {
/* Pentium IV */
@@ -400,7 +396,6 @@ int __init nmi_init(struct oprofile_oper
return -ENODEV;
}
break;
-#endif /* !CONFIG_X86_64 */
default:
return -ENODEV;
--- linux-2.6.3/arch/i386/oprofile/nmi_timer_int.c 2003-06-22 12:04:43.000000000 -0700
+++ 25/arch/i386/oprofile/nmi_timer_int.c 2004-02-20 00:19:59.000000000 -0800
@@ -48,9 +48,13 @@ static struct oprofile_operations nmi_ti
.cpu_type = "timer"
};
-
int __init nmi_timer_init(struct oprofile_operations ** ops)
{
+ extern int nmi_active;
+
+ if (nmi_active <= 0)
+ return -ENODEV;
+
*ops = &nmi_timer_ops;
printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
return 0;
--- linux-2.6.3/arch/i386/oprofile/op_model_p4.c 2003-08-22 19:23:40.000000000 -0700
+++ 25/arch/i386/oprofile/op_model_p4.c 2004-02-20 00:20:52.000000000 -0800
@@ -382,11 +382,8 @@ static struct p4_event_binding p4_events
static unsigned int get_stagger(void)
{
#ifdef CONFIG_SMP
- int cpu;
- if (smp_num_siblings > 1) {
- cpu = smp_processor_id();
- return (cpu_sibling_map[cpu] > cpu) ? 0 : 1;
- }
+ int cpu = smp_processor_id();
+ return (cpu != first_cpu(cpu_sibling_map[cpu]));
#endif
return 0;
}
--- linux-2.6.3/arch/i386/oprofile/op_model_ppro.c 2003-08-22 19:23:40.000000000 -0700
+++ 25/arch/i386/oprofile/op_model_ppro.c 2004-02-20 00:19:59.000000000 -0800
@@ -13,6 +13,7 @@
#include
#include
#include
+#include