--- linux-2.6.3/arch/alpha/kernel/alpha_ksyms.c 2003-06-22 12:04:43.000000000 -0700 +++ 25/arch/alpha/kernel/alpha_ksyms.c 2004-02-17 22:25:29.000000000 -0800 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/alpha/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/alpha/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -252,7 +252,7 @@ static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -333,7 +333,7 @@ static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/alpha/kernel/osf_sys.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/alpha/kernel/osf_sys.c 2004-02-17 22:25:26.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include extern int do_pipe(int *); -extern asmlinkage unsigned long sys_brk(unsigned long); /* * Brk needs to return an error. Still support Linux's brk(0) query idiom, @@ -821,7 +821,6 @@ osf_setsysinfo(unsigned long op, void *b affects all sorts of things, like timeval and itimerval. */ extern struct timezone sys_tz; -extern asmlinkage int sys_utimes(char *, struct timeval *); extern int do_adjtimex(struct timex *); struct timeval32 @@ -1315,8 +1314,6 @@ arch_get_unmapped_area(struct file *filp } #ifdef CONFIG_OSF4_COMPAT -extern ssize_t sys_readv(unsigned long, const struct iovec *, unsigned long); -extern ssize_t sys_writev(unsigned long, const struct iovec *, unsigned long); /* Clear top 32 bits of iov_len in the user's buffer for compatibility with old versions of OSF/1 where iov_len --- linux-2.6.3/arch/alpha/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/alpha/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -503,6 +503,7 @@ do_settimeofday(struct timespec *tv) time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/arm26/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm26/Kconfig 2004-02-17 22:06:16.000000000 -0800 @@ -216,11 +216,6 @@ source "drivers/input/Kconfig" source "drivers/char/Kconfig" -config KBDMOUSE - bool - depends on ARCH_ACORN && BUSMOUSE=y - default y - source "drivers/media/Kconfig" source "fs/Kconfig" --- linux-2.6.3/arch/arm26/kernel/armksyms.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/arm26/kernel/armksyms.c 2004-02-17 22:25:25.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -43,14 +44,6 @@ extern void outswb(unsigned int port, co extern void __bad_xchg(volatile void *ptr, int size); /* - * syscalls - */ -extern int sys_write(int, const char *, int); -extern int sys_read(int, char *, int); -extern int sys_lseek(int, off_t, int); -extern int sys_exit(int); - -/* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that * doesn't really matter since they're not versioned). --- linux-2.6.3/arch/arm26/kernel/sys_arm.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/arm26/kernel/sys_arm.c 2004-02-17 22:25:25.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -138,7 +139,6 @@ out: * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. */ -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); struct sel_arg_struct { unsigned long n; --- linux-2.6.3/arch/arm26/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/arm26/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -179,6 +179,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/arm/Kconfig 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/arm/Kconfig 2004-02-17 22:25:47.000000000 -0800 @@ -639,6 +639,8 @@ source "drivers/media/Kconfig" source "fs/Kconfig" +source "arch/arm/oprofile/Kconfig" + source "drivers/video/Kconfig" if ARCH_ACORN || ARCH_CLPS7500 || ARCH_TBOX || ARCH_SHARK || ARCH_SA1100 || PCI --- linux-2.6.3/arch/arm/kernel/armksyms.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/arm/kernel/armksyms.c 2004-02-17 22:25:25.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -44,14 +45,6 @@ extern void outswb(unsigned int port, co extern void __bad_xchg(volatile void *ptr, int size); /* - * syscalls - */ -extern int sys_write(int, const char *, int); -extern int sys_read(int, char *, int); -extern int sys_lseek(int, off_t, int); -extern int sys_exit(int); - -/* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that * doesn't really matter since they're not versioned). --- linux-2.6.3/arch/arm/kernel/sys_arm.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/arm/kernel/sys_arm.c 2004-02-17 22:25:25.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -137,7 +138,6 @@ out: * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. */ -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); struct sel_arg_struct { unsigned long n; --- linux-2.6.3/arch/arm/kernel/time.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/arm/kernel/time.c 2004-02-17 22:25:47.000000000 -0800 @@ -85,6 +85,9 @@ unsigned long long __attribute__((weak)) */ static inline void do_profile(struct pt_regs *regs) { + + profile_hook(regs); + if (!user_mode(regs) && prof_buffer && current->pid) { --- linux-2.6.3/arch/arm/Makefile 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/arm/Makefile 2004-02-17 22:25:47.000000000 -0800 @@ -116,6 +116,7 @@ endif core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) +drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/ drivers-$(CONFIG_ARCH_CLPS7500) += drivers/acorn/char/ drivers-$(CONFIG_ARCH_L7200) += drivers/acorn/char/ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/oprofile/init.c 2004-02-17 22:25:47.000000000 -0800 @@ -0,0 +1,22 @@ +/** + * @file init.c + * + * @remark Copyright 2004 Oprofile Authors + * + * @author Zwane Mwaikambo + */ + +#include +#include +#include + +int oprofile_arch_init(struct oprofile_operations **ops) +{ + int ret = -ENODEV; + + return ret; +} + +void oprofile_arch_exit(void) +{ +} --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/oprofile/Kconfig 2004-02-17 22:25:47.000000000 -0800 @@ -0,0 +1,23 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + +endmenu + --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/oprofile/Makefile 2004-02-17 22:25:47.000000000 -0800 @@ -0,0 +1,9 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) init.o --- linux-2.6.3/arch/cris/arch-v10/drivers/ethernet.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/cris/arch-v10/drivers/ethernet.c 2004-02-17 22:06:13.000000000 -0800 @@ -482,7 +482,7 @@ etrax_ethernet_init(void) /* Register device */ err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3/arch/cris/kernel/sys_cris.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/cris/kernel/sys_cris.c 2004-02-17 22:25:28.000000000 -0800 @@ -11,6 +11,7 @@ #include #include +#include #include #include #include --- linux-2.6.3/arch/cris/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/cris/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -108,6 +108,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; local_irq_restore(flags); + clock_was_set(); return 0; } --- linux-2.6.3/arch/h8300/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/h8300/kernel/signal.c 2004-02-17 22:25:26.000000000 -0800 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -46,8 +47,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -asmlinkage long sys_wait4(pid_t pid, unsigned int * stat_addr, int options, - struct rusage * ru); asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs); /* --- linux-2.6.3/arch/h8300/kernel/sys_h8300.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/h8300/kernel/sys_h8300.c 2004-02-17 22:25:30.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -155,8 +156,6 @@ out: } #endif -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; @@ -261,7 +260,7 @@ asmlinkage int sys_ipc (uint call, int f return -EINVAL; } -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on) { return -ENOSYS; } --- linux-2.6.3/arch/h8300/kernel/time.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/h8300/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -139,6 +139,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/i386/boot/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/boot/Makefile 2004-02-17 22:25:44.000000000 -0800 @@ -31,6 +31,8 @@ subdir- := compressed host-progs := tools/build +HOSTCFLAGS_build.o := -Iinclude + # --------------------------------------------------------------------------- $(obj)/zImage: IMAGE_OFFSET := 0x1000 --- linux-2.6.3/arch/i386/boot/setup.S 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/boot/setup.S 2004-02-17 22:26:07.000000000 -0800 @@ -164,7 +164,7 @@ cmd_line_ptr: .long 0 # (Header versio # can be located anywhere in # low memory 0x10000 or higher. -ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) +ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) # The highest safe address for # the contents of an initrd @@ -776,7 +776,7 @@ end_move_self: # now we are at the r # AMD Elan bug fix by Robert Schwebel. # -#if defined(CONFIG_MELAN) +#if defined(CONFIG_X86_ELAN) movb $0x02, %al # alternate A20 gate outb %al, $0x92 # this works on SC410/SC520 a20_elan_wait: --- linux-2.6.3/arch/i386/Kconfig 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/Kconfig 2004-02-17 22:26:07.000000000 -0800 @@ -43,6 +43,15 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. +config X86_ELAN + bool "AMD Elan" + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + config X86_VOYAGER bool "Voyager (NCR)" help @@ -130,6 +139,8 @@ config ES7000_CLUSTERED_APIC default y depends on SMP && X86_ES7000 && MPENTIUMIII +if !X86_ELAN + choice prompt "Processor family" default M686 @@ -222,14 +233,20 @@ config MPENTIUMIII extended prefetch instructions in addition to the Pentium II extensions. +config MPENTIUMM + bool "Pentium M" + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Xeon" + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" help - Select this for Intel Pentium 4 chips. This includes both - the Pentium 4 and P4-based Celeron chips. This option - enables compile flags optimized for the chip, uses the - correct cache shift, and applies any applicable Pentium III - optimizations. + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M + (not Pentium M) chips. This option enables compile flags + optimized for the chip, uses the correct cache shift, and + applies any applicable Pentium III optimizations. config MK6 bool "K6/K6-II/K6-III" @@ -312,6 +329,8 @@ config X86_GENERIC when it has moderate overhead. This is intended for generic distributions kernels. +endif + # # Define implied options from the CPU selection here # @@ -328,9 +347,9 @@ config X86_XADD config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || X86_GENERIC - default "4" if MELAN || M486 || M386 + default "4" if X86_ELAN || M486 || M386 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 - default "6" if MK7 || MK8 + default "6" if MK7 || MK8 || MPENTIUMM config RWSEM_GENERIC_SPINLOCK bool @@ -374,22 +393,22 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 default y config X86_GOOD_APIC bool - depends on MK7 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 default y config X86_INTEL_USERCOPY bool - depends on MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 default y config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 default y config X86_USE_3DNOW @@ -402,6 +421,54 @@ config X86_OOSTORE depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR default y +config X86_4G + bool "4 GB kernel-space and 4 GB user-space virtual memory support" + help + This option is only useful for systems that have more than 1 GB + of RAM. + + The default kernel VM layout leaves 1 GB of virtual memory for + kernel-space mappings, and 3 GB of VM for user-space applications. + This option ups both the kernel-space VM and the user-space VM to + 4 GB. + + The cost of this option is additional TLB flushes done at + system-entry points that transition from user-mode into kernel-mode. + I.e. system calls and page faults, and IRQs that interrupt user-mode + code. There's also additional overhead to kernel operations that copy + memory to/from user-space. The overhead from this is hard to tell and + depends on the workload - it can be anything from no visible overhead + to 20-30% overhead. A good rule of thumb is to count with a runtime + overhead of 20%. + + The upside is the much increased kernel-space VM, which more than + quadruples the maximum amount of RAM supported. Kernels compiled with + this option boot on 64GB of RAM and still have more than 3.1 GB of + 'lowmem' left. Another bonus is that highmem IO bouncing decreases, + if used with drivers that still use bounce-buffers. + + There's also a 33% increase in user-space VM size - database + applications might see a boost from this. + + But the cost of the TLB flushes and the runtime overhead has to be + weighed against the bonuses offered by the larger VM spaces. The + dividing line depends on the actual workload - there might be 4 GB + systems that benefit from this option. Systems with less than 4 GB + of RAM will rarely see a benefit from this option - but it's not + out of question, the exact circumstances have to be considered. + +config X86_SWITCH_PAGETABLES + def_bool X86_4G + +config X86_4G_VM_LAYOUT + def_bool X86_4G + +config X86_UACCESS_INDIRECT + def_bool X86_4G + +config X86_HIGH_ENTRY + def_bool X86_4G + config HPET_TIMER bool "HPET Timer Support" help @@ -459,6 +526,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help @@ -513,7 +590,7 @@ config X86_IO_APIC config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ default y config X86_MCE @@ -603,8 +680,6 @@ config MICROCODE To compile this driver as a module, choose M here: the module will be called microcode. - If you use modprobe or kmod you may also want to add the line - 'alias char-major-10-184 microcode' to your /etc/modules.conf file. config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -701,7 +776,7 @@ config X86_PAE # Common NUMA Features config NUMA bool "Numa Memory Allocation Support" - depends on SMP && HIGHMEM64G && (X86_PC || X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) + depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) @@ -809,6 +884,14 @@ config EFI anything about EFI). However, even with this option, the resultant kernel should continue to boot on existing non-EFI platforms. +config IRQBALANCE + bool "Enable kernel irq balancing" + depends on SMP + default y + help + The defalut yes will allow the kernel to do irq load balancing. + Saying no will keep the kernel from doing irq load balancing. + config HAVE_DEC_LOCK bool depends on (SMP || PREEMPT) && X86_CMPXCHG @@ -821,6 +904,19 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y +config REGPARM + bool "Use register arguments (EXPERIMENTAL)" + depends on EXPERIMENTAL + default n + help + Compile the kernel with -mregparm=3. This uses an different ABI + and passes the first three arguments of a function call in registers. + This will probably break binary only modules. + + This feature is only enabled for gcc-3.0 and later - earlier compilers + generate incorrect output with certain kernel constructs when + -mregparm=3 is used. + endmenu @@ -1030,12 +1126,16 @@ config PCI_GOBIOS PCI-based systems don't have any BIOS at all. Linux can also try to detect the PCI hardware directly without using the BIOS. - With this option, you can specify how Linux should detect the PCI - devices. If you choose "BIOS", the BIOS will be used, if you choose - "Direct", the BIOS won't be used, and if you choose "Any", the - kernel will try the direct access method and falls back to the BIOS - if that doesn't work. If unsure, go with the default, which is - "Any". + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOMMCONFIG + bool "MMConfig" config PCI_GODIRECT bool "Direct" @@ -1055,6 +1155,12 @@ config PCI_DIRECT depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) default y +config PCI_MMCONFIG + bool + depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY) + select ACPI_BOOT + default y + config PCI_USE_VECTOR bool "Vector-based interrupt indexing" depends on X86_LOCAL_APIC && X86_IO_APIC @@ -1177,6 +1283,19 @@ config DEBUG_KERNEL Say Y here if you are developing drivers or trying to debug and identify kernel problems. +config EARLY_PRINTK + bool "Early printk" if EMBEDDED + default y + help + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. + config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL @@ -1231,6 +1350,15 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config SPINLINE + bool "Spinlock inlining" + depends on DEBUG_KERNEL + help + This will change spinlocks from out of line to inline, making them + account cost to the callers in readprofile, rather than the lock + itself (as ".text.lock.filename"). This can be helpful for finding + the callers of locks. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM @@ -1247,20 +1375,208 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + config FRAME_POINTER bool "Compile the kernel with frame pointers" + default KGDB help If you say Y here the resulting kernel image will be slightly larger and slower, but it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame pointers. +config MAGIC_SYSRQ + bool + depends on KGDB_SYSRQ + default y + config X86_FIND_SMP_CONFIG bool depends on X86_LOCAL_APIC || X86_VOYAGER --- linux-2.6.3/arch/i386/kernel/acpi/boot.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/acpi/boot.c 2004-02-17 22:26:07.000000000 -0800 @@ -96,6 +96,31 @@ char *__acpi_map_table(unsigned long phy } +#ifdef CONFIG_PCI_MMCONFIG +static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) +{ + struct acpi_table_mcfg *mcfg; + + if (!phys_addr || !size) + return -EINVAL; + + mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size); + if (!mcfg) { + printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); + return -ENODEV; + } + + if (mcfg->base_reserved) { + printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); + return -ENODEV; + } + + pci_mmcfg_base_addr = mcfg->base_address; + + return 0; +} +#endif /* CONFIG_PCI_MMCONFIG */ + #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; @@ -339,7 +364,7 @@ acpi_scan_rsdp ( * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } @@ -376,6 +401,37 @@ static int __init acpi_parse_hpet(unsign } #endif +/* detect the location of the ACPI PM Timer */ +#ifdef CONFIG_X86_PM_TIMER +extern u32 pmtmr_ioport; + +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) +{ + struct fadt_descriptor_rev2 *fadt =0; + + fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); + if(!fadt) { + printk(KERN_WARNING PREFIX "Unable to map FADT\n"); + return 0; + } + + if (fadt->revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ + if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + pmtmr_ioport = fadt->xpm_tmr_blk.address; + } else { + /* FADT rev. 1 */ + pmtmr_ioport = fadt->V1_pm_tmr_blk; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); + return 0; +} +#endif + + unsigned long __init acpi_find_rsdp (void) { @@ -398,55 +454,14 @@ acpi_find_rsdp (void) return rsdp_phys; } -/* - * acpi_boot_init() - * called from setup_arch(), always. - * 1. maps ACPI tables for later use - * 2. enumerates lapics - * 3. enumerates io-apics - * - * side effects: - * acpi_lapic = 1 if LAPIC found - * acpi_ioapic = 1 if IOAPIC found - * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; - * if acpi_blacklisted() acpi_disabled = 1; - * acpi_irq_model=... - * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure - */ -int __init -acpi_boot_init (void) +static int acpi_apic_setup(void) { - int result = 0; + int result; - if (acpi_disabled && !acpi_ht) - return 1; - - /* - * The default interrupt routing model is PIC (8259). This gets - * overriden if IOAPICs are enumerated (below). - */ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; - - /* - * Initialize the ACPI boot-time table parser. - */ - result = acpi_table_init(); - if (result) { - acpi_disabled = 1; - return result; - } - - result = acpi_blacklisted(); - if (result) { - printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); - acpi_disabled = 1; - return result; - } +#ifdef CONFIG_X86_PM_TIMER + acpi_table_parse(ACPI_FADT, acpi_parse_fadt); +#endif #ifdef CONFIG_X86_LOCAL_APIC @@ -506,24 +521,17 @@ acpi_boot_init (void) acpi_lapic = 1; -#endif /*CONFIG_X86_LOCAL_APIC*/ +#endif /* CONFIG_X86_LOCAL_APIC */ #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) /* * I/O APIC - * -------- */ - /* - * ACPI interpreter is required to complete interrupt setup, - * so if it is off, don't enumerate the io-apics with ACPI. - * If MPS is present, it will handle them, - * otherwise the system will stay in PIC mode - */ - if (acpi_disabled || acpi_noirq) { + if (acpi_noirq) { return 1; - } + } /* * if "noapic" boot option, don't look for IO-APICs @@ -538,8 +546,7 @@ acpi_boot_init (void) if (!result) { printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); return -ENODEV; - } - else if (result < 0) { + } else if (result < 0) { printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); return result; } @@ -569,16 +576,87 @@ acpi_boot_init (void) #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ -#ifdef CONFIG_X86_LOCAL_APIC if (acpi_lapic && acpi_ioapic) { smp_found_config = 1; clustered_apic_check(); } -#endif + + return 0; +} + +/* + * acpi_boot_init() + * called from setup_arch(), always. + * 1. maps ACPI tables for later use + * 2. enumerates lapics + * 3. enumerates io-apics + * + * side effects: + * acpi_lapic = 1 if LAPIC found + * acpi_ioapic = 1 if IOAPIC found + * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; + * if acpi_blacklisted() acpi_disabled = 1; + * acpi_irq_model=... + * ... + * + * return value: (currently ignored) + * 0: success + * !0: failure + */ + +int __init +acpi_boot_init (void) +{ + int result, error; + + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * The default interrupt routing model is PIC (8259). This gets + * overriden if IOAPICs are enumerated (below). + */ + acpi_irq_model = ACPI_IRQ_MODEL_PIC; + + /* + * Initialize the ACPI boot-time table parser. + */ + result = acpi_table_init(); + if (result) { + acpi_disabled = 1; + return result; + } + + result = acpi_blacklisted(); + if (result) { + printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); + acpi_disabled = 1; + return result; + } + + error = acpi_apic_setup(); + +#ifdef CONFIG_PCI_MMCONFIG + result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); + if (result < 0) { + printk(KERN_ERR PREFIX "Error %d parsing MCFG\n", result); + if (!error) + error = result; + } else if (result > 1) { + printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n"); + } +#endif /* CONFIG_PCI_MMCONFIG */ #ifdef CONFIG_HPET_TIMER - acpi_table_parse(ACPI_HPET, acpi_parse_hpet); + result = acpi_table_parse(ACPI_HPET, acpi_parse_hpet); + if (result < 0) { + printk(KERN_ERR PREFIX "Error %d parsing HPET\n", result); + if (!error) + error = result; + } else if (result > 1) { + printk(KERN_WARNING PREFIX "Multiple HPET tables exist\n"); + } #endif - return 0; + return error; } --- linux-2.6.3/arch/i386/kernel/asm-offsets.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/asm-offsets.c 2004-02-17 22:26:07.000000000 -0800 @@ -4,9 +4,11 @@ * to extract and format the required data. */ +#include #include #include #include "sigframe.h" +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -28,4 +30,17 @@ void foo(void) DEFINE(RT_SIGFRAME_sigcontext, offsetof (struct rt_sigframe, uc.uc_mcontext)); + DEFINE(TI_task, offsetof (struct thread_info, task)); + DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); + DEFINE(TI_flags, offsetof (struct thread_info, flags)); + DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); + DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); + DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); + DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); + DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); + + DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); + DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); } --- linux-2.6.3/arch/i386/kernel/cpu/centaur.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/i386/kernel/cpu/centaur.c 2004-02-17 22:25:20.000000000 -0800 @@ -246,7 +246,15 @@ static void __init winchip2_protect_mcr( lo&=~0x1C0; /* blank bits 8-6 */ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } -#endif +#endif /* CONFIG_X86_OOSTORE */ + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ static void __init init_c3(struct cpuinfo_x86 *c) { @@ -254,6 +262,24 @@ static void __init init_c3(struct cpuinf /* Test for Centaur Extended Feature Flags presence */ if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= ACE_FCR; /* enable ACE unit */ + wrmsr (MSR_VIA_FCR, lo, hi); + printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + } + + /* enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr (MSR_VIA_RNG, lo, hi); + lo |= RNG_ENABLE; /* enable RNG unit */ + wrmsr (MSR_VIA_RNG, lo, hi); + printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + } + /* store Centaur Extended Feature Flags as * word 5 of the CPU capability bit array */ --- linux-2.6.3/arch/i386/kernel/cpu/common.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/common.c 2004-02-17 22:26:07.000000000 -0800 @@ -514,12 +514,16 @@ void __init cpu_init (void) set_tss_desc(cpu,t); cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); - load_LDT(&init_mm.context); + if (cpu) + load_LDT(&init_mm.context); /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + if (cpu) + trap_init_virtual_GDT(); + /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); --- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-02-17 22:25:01.000000000 -0800 @@ -54,7 +54,7 @@ config X86_ACPI_CPUFREQ_PROC_INTF config ELAN_CPUFREQ tristate "AMD Elan" - depends on CPU_FREQ_TABLE && MELAN + depends on CPU_FREQ_TABLE && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC400 and SC410 processors. --- linux-2.6.3/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-17 22:09:02.000000000 -0800 @@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int u32 l, h; cpumask_t cpus_allowed, affected_cpu_map; struct cpufreq_freqs freqs; - int hyperthreading = 0; - int sibling = 0; + int j; if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) @@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int cpus_allowed = current->cpus_allowed; /* only run on CPU to be set, or on its sibling */ - affected_cpu_map = cpumask_of_cpu(cpu); -#ifdef CONFIG_X86_HT - hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2)); - if (hyperthreading) { - sibling = cpu_sibling_map[cpu]; - cpu_set(sibling, affected_cpu_map); - } +#ifdef CONFIG_SMP + affected_cpu_map = cpu_sibling_map[cpu]; +#else + affected_cpu_map = cpumask_of_cpu(cpu); #endif set_cpus_allowed(current, affected_cpu_map); BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map)); @@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int /* notifiers */ freqs.old = stock_freq * l / 8; freqs.new = stock_freq * newstate / 8; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (hyperthreading) { - freqs.cpu = sibling; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } } rdmsr(MSR_IA32_THERM_STATUS, l, h); @@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int set_cpus_allowed(current, cpus_allowed); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - if (hyperthreading) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } return 0; --- linux-2.6.3/arch/i386/kernel/cpu/intel.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/cpu/intel.c 2004-02-17 22:26:07.000000000 -0800 @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu.h" @@ -19,8 +20,6 @@ #include #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -165,7 +164,7 @@ static void __init init_intel(struct cpu c->f00f_bug = 1; if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); + trap_init_virtual_IDT(); printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } @@ -248,6 +247,12 @@ static void __init init_intel(struct cpu /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_bit(X86_FEATURE_SEP, c->x86_capability); + /* + * FIXME: SEP is disabled for 4G/4G for now: + */ +#ifdef CONFIG_X86_HIGH_ENTRY + clear_bit(X86_FEATURE_SEP, c->x86_capability); +#endif /* Names for the Pentium II/Celeron processors detectable only by also checking the cache size. --- linux-2.6.3/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-02-17 22:25:40.000000000 -0800 @@ -24,8 +24,6 @@ #include "mce.h" -static struct timer_list mce_timer; -static int timerset; static int firstbank; #define MCE_RATE 15*HZ /* timer rate is 15s */ @@ -35,14 +33,15 @@ static void mce_checkregs (void *info) u32 low, high; int i; - preempt_disable(); for (i=firstbank; i 1) - schedule_work (&mce_work); -#endif - mce_timer.expires = jiffies + MCE_RATE; - add_timer (&mce_timer); -} - static int __init init_nonfatal_mce_checker(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -91,17 +80,11 @@ static int __init init_nonfatal_mce_chec else firstbank = 0; - if (timerset == 0) { - /* Set the timer to check for non-fatal - errors every MCE_RATE seconds */ - init_timer (&mce_timer); - mce_timer.expires = jiffies + MCE_RATE; - mce_timer.data = 0; - mce_timer.function = &mce_timerfunc; - add_timer (&mce_timer); - timerset = 1; - printk(KERN_INFO "Machine check exception polling timer started.\n"); - } + /* + * Check for non-fatal errors every MCE_RATE s + */ + schedule_delayed_work(&mce_work, MCE_RATE); + printk(KERN_INFO "Machine check exception polling timer started.\n"); return 0; } module_init(init_nonfatal_mce_checker); --- linux-2.6.3/arch/i386/kernel/cpu/proc.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/cpu/proc.c 2004-02-17 22:25:20.000000000 -0800 @@ -50,7 +50,7 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, --- linux-2.6.3/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/doublefault.c 2004-02-17 22:26:07.000000000 -0800 @@ -7,12 +7,13 @@ #include #include #include +#include #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) -#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) +#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) static void doublefault_fn(void) { @@ -38,8 +39,8 @@ static void doublefault_fn(void) printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", t->eax, t->ebx, t->ecx, t->edx); - printk("esi = %08lx, edi = %08lx\n", - t->esi, t->edi); + printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", + t->esi, t->edi, t->ebp); } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/early_printk.c 2004-02-17 22:08:46.000000000 -0800 @@ -0,0 +1,2 @@ + +#include "../../x86_64/kernel/early_printk.c" --- linux-2.6.3/arch/i386/kernel/edd.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/edd.c 2004-02-17 22:25:19.000000000 -0800 @@ -134,18 +134,18 @@ edd_show_host_bus(struct edd_device *ede for (i = 0; i < 4; i++) { if (isprint(info->params.host_bus_type[i])) { - p += snprintf(p, left, "%c", info->params.host_bus_type[i]); + p += scnprintf(p, left, "%c", info->params.host_bus_type[i]); } else { - p += snprintf(p, left, " "); + p += scnprintf(p, left, " "); } } if (!strncmp(info->params.host_bus_type, "ISA", 3)) { - p += snprintf(p, left, "\tbase_address: %x\n", + p += scnprintf(p, left, "\tbase_address: %x\n", info->params.interface_path.isa.base_address); } else if (!strncmp(info->params.host_bus_type, "PCIX", 4) || !strncmp(info->params.host_bus_type, "PCI", 3)) { - p += snprintf(p, left, + p += scnprintf(p, left, "\t%02x:%02x.%d channel: %u\n", info->params.interface_path.pci.bus, info->params.interface_path.pci.slot, @@ -154,12 +154,12 @@ edd_show_host_bus(struct edd_device *ede } else if (!strncmp(info->params.host_bus_type, "IBND", 4) || !strncmp(info->params.host_bus_type, "XPRS", 4) || !strncmp(info->params.host_bus_type, "HTPT", 4)) { - p += snprintf(p, left, + p += scnprintf(p, left, "\tTBD: %llx\n", info->params.interface_path.ibnd.reserved); } else { - p += snprintf(p, left, "\tunknown: %llx\n", + p += scnprintf(p, left, "\tunknown: %llx\n", info->params.interface_path.unknown.reserved); } return (p - buf); @@ -178,43 +178,43 @@ edd_show_interface(struct edd_device *ed for (i = 0; i < 8; i++) { if (isprint(info->params.interface_type[i])) { - p += snprintf(p, left, "%c", info->params.interface_type[i]); + p += scnprintf(p, left, "%c", info->params.interface_type[i]); } else { - p += snprintf(p, left, " "); + p += scnprintf(p, left, " "); } } if (!strncmp(info->params.interface_type, "ATAPI", 5)) { - p += snprintf(p, left, "\tdevice: %u lun: %u\n", + p += scnprintf(p, left, "\tdevice: %u lun: %u\n", info->params.device_path.atapi.device, info->params.device_path.atapi.lun); } else if (!strncmp(info->params.interface_type, "ATA", 3)) { - p += snprintf(p, left, "\tdevice: %u\n", + p += scnprintf(p, left, "\tdevice: %u\n", info->params.device_path.ata.device); } else if (!strncmp(info->params.interface_type, "SCSI", 4)) { - p += snprintf(p, left, "\tid: %u lun: %llu\n", + p += scnprintf(p, left, "\tid: %u lun: %llu\n", info->params.device_path.scsi.id, info->params.device_path.scsi.lun); } else if (!strncmp(info->params.interface_type, "USB", 3)) { - p += snprintf(p, left, "\tserial_number: %llx\n", + p += scnprintf(p, left, "\tserial_number: %llx\n", info->params.device_path.usb.serial_number); } else if (!strncmp(info->params.interface_type, "1394", 4)) { - p += snprintf(p, left, "\teui: %llx\n", + p += scnprintf(p, left, "\teui: %llx\n", info->params.device_path.i1394.eui); } else if (!strncmp(info->params.interface_type, "FIBRE", 5)) { - p += snprintf(p, left, "\twwid: %llx lun: %llx\n", + p += scnprintf(p, left, "\twwid: %llx lun: %llx\n", info->params.device_path.fibre.wwid, info->params.device_path.fibre.lun); } else if (!strncmp(info->params.interface_type, "I2O", 3)) { - p += snprintf(p, left, "\tidentity_tag: %llx\n", + p += scnprintf(p, left, "\tidentity_tag: %llx\n", info->params.device_path.i2o.identity_tag); } else if (!strncmp(info->params.interface_type, "RAID", 4)) { - p += snprintf(p, left, "\tidentity_tag: %x\n", + p += scnprintf(p, left, "\tidentity_tag: %x\n", info->params.device_path.raid.array_number); } else if (!strncmp(info->params.interface_type, "SATA", 4)) { - p += snprintf(p, left, "\tdevice: %u\n", + p += scnprintf(p, left, "\tdevice: %u\n", info->params.device_path.sata.device); } else { - p += snprintf(p, left, "\tunknown: %llx %llx\n", + p += scnprintf(p, left, "\tunknown: %llx %llx\n", info->params.device_path.unknown.reserved1, info->params.device_path.unknown.reserved2); } @@ -256,7 +256,7 @@ edd_show_version(struct edd_device *edev return -EINVAL; } - p += snprintf(p, left, "0x%02x\n", info->version); + p += scnprintf(p, left, "0x%02x\n", info->version); return (p - buf); } @@ -264,7 +264,7 @@ static ssize_t edd_show_disk80_sig(struct edd_device *edev, char *buf) { char *p = buf; - p += snprintf(p, left, "0x%08x\n", edd_disk80_sig); + p += scnprintf(p, left, "0x%08x\n", edd_disk80_sig); return (p - buf); } @@ -278,16 +278,16 @@ edd_show_extensions(struct edd_device *e } if (info->interface_support & EDD_EXT_FIXED_DISK_ACCESS) { - p += snprintf(p, left, "Fixed disk access\n"); + p += scnprintf(p, left, "Fixed disk access\n"); } if (info->interface_support & EDD_EXT_DEVICE_LOCKING_AND_EJECTING) { - p += snprintf(p, left, "Device locking and ejecting\n"); + p += scnprintf(p, left, "Device locking and ejecting\n"); } if (info->interface_support & EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT) { - p += snprintf(p, left, "Enhanced Disk Drive support\n"); + p += scnprintf(p, left, "Enhanced Disk Drive support\n"); } if (info->interface_support & EDD_EXT_64BIT_EXTENSIONS) { - p += snprintf(p, left, "64-bit extensions\n"); + p += scnprintf(p, left, "64-bit extensions\n"); } return (p - buf); } @@ -302,21 +302,21 @@ edd_show_info_flags(struct edd_device *e } if (info->params.info_flags & EDD_INFO_DMA_BOUNDARY_ERROR_TRANSPARENT) - p += snprintf(p, left, "DMA boundary error transparent\n"); + p += scnprintf(p, left, "DMA boundary error transparent\n"); if (info->params.info_flags & EDD_INFO_GEOMETRY_VALID) - p += snprintf(p, left, "geometry valid\n"); + p += scnprintf(p, left, "geometry valid\n"); if (info->params.info_flags & EDD_INFO_REMOVABLE) - p += snprintf(p, left, "removable\n"); + p += scnprintf(p, left, "removable\n"); if (info->params.info_flags & EDD_INFO_WRITE_VERIFY) - p += snprintf(p, left, "write verify\n"); + p += scnprintf(p, left, "write verify\n"); if (info->params.info_flags & EDD_INFO_MEDIA_CHANGE_NOTIFICATION) - p += snprintf(p, left, "media change notification\n"); + p += scnprintf(p, left, "media change notification\n"); if (info->params.info_flags & EDD_INFO_LOCKABLE) - p += snprintf(p, left, "lockable\n"); + p += scnprintf(p, left, "lockable\n"); if (info->params.info_flags & EDD_INFO_NO_MEDIA_PRESENT) - p += snprintf(p, left, "no media present\n"); + p += scnprintf(p, left, "no media present\n"); if (info->params.info_flags & EDD_INFO_USE_INT13_FN50) - p += snprintf(p, left, "use int13 fn50\n"); + p += scnprintf(p, left, "use int13 fn50\n"); return (p - buf); } @@ -329,7 +329,7 @@ edd_show_default_cylinders(struct edd_de return -EINVAL; } - p += snprintf(p, left, "0x%x\n", info->params.num_default_cylinders); + p += scnprintf(p, left, "0x%x\n", info->params.num_default_cylinders); return (p - buf); } @@ -342,7 +342,7 @@ edd_show_default_heads(struct edd_device return -EINVAL; } - p += snprintf(p, left, "0x%x\n", info->params.num_default_heads); + p += scnprintf(p, left, "0x%x\n", info->params.num_default_heads); return (p - buf); } @@ -355,7 +355,7 @@ edd_show_default_sectors_per_track(struc return -EINVAL; } - p += snprintf(p, left, "0x%x\n", info->params.sectors_per_track); + p += scnprintf(p, left, "0x%x\n", info->params.sectors_per_track); return (p - buf); } @@ -368,7 +368,7 @@ edd_show_sectors(struct edd_device *edev return -EINVAL; } - p += snprintf(p, left, "0x%llx\n", info->params.number_of_sectors); + p += scnprintf(p, left, "0x%llx\n", info->params.number_of_sectors); return (p - buf); } --- linux-2.6.3/arch/i386/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/kernel/entry.S 2004-02-17 22:26:07.000000000 -0800 @@ -43,11 +43,25 @@ #include #include #include +#include #include #include +#include #include #include #include "irq_vectors.h" + /* We do not recover from a stack overflow, but at least + * we know it happened and should be able to track it down. + */ +#ifdef CONFIG_STACK_OVERFLOW_TEST +#define STACK_OVERFLOW_TEST \ + testl $7680,%esp; \ + jnz 10f; \ + call stack_overflow; \ +10: +#else +#define STACK_OVERFLOW_TEST +#endif #define nr_syscalls ((syscall_table_size)/4) @@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) #define resume_kernel restore_all #endif -#define SAVE_ALL \ +#ifdef CONFIG_X86_HIGH_ENTRY + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +/* + * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, + * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is + * left stale, so we must check whether to repeat the real stack calculation. + */ +#define repeat_if_esp_changed \ + xorl %esp, %ebp; \ + testl $0xffffe000, %ebp; \ + jnz 0b +#else +#define repeat_if_esp_changed +#endif + +/* clobbers ebx, edx and ebp */ + +#define __SWITCH_KERNELSPACE \ + cmpl $0xff000000, %esp; \ + jb 1f; \ + \ + /* \ + * switch pagetables and load the real stack, \ + * keep the stack offset: \ + */ \ + \ + movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ + \ + /* GET_THREAD_INFO(%ebp) intermixed */ \ +0: \ + movl %esp, %ebp; \ + movl %esp, %ebx; \ + andl $0xffffe000, %ebp; \ + andl $0x00001fff, %ebx; \ + orl TI_real_stack(%ebp), %ebx; \ + repeat_if_esp_changed; \ + \ + movl %edx, %cr3; \ + movl %ebx, %esp; \ +1: + +#endif + + +#define __SWITCH_USERSPACE \ + /* interrupted any of the user return paths? */ \ + \ + movl EIP(%esp), %eax; \ + \ + cmpl $int80_ret_start_marker, %eax; \ + jb 33f; /* nope - continue with sysexit check */\ + cmpl $int80_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ +33: \ + cmpl $sysexit_ret_start_marker, %eax; \ + jb 44f; /* nope - continue with user check */ \ + cmpl $sysexit_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ + /* return to userspace? */ \ +44: \ + movl EFLAGS(%esp),%ecx; \ + movb CS(%esp),%cl; \ + testl $(VM_MASK | 3),%ecx; \ + jz 2f; \ +22: \ + /* \ + * switch to the virtual stack, then switch to \ + * the userspace pagetables. \ + */ \ + \ + GET_THREAD_INFO(%ebp); \ + movl TI_virtual_stack(%ebp), %edx; \ + movl TI_user_pgd(%ebp), %ecx; \ + \ + movl %esp, %ebx; \ + andl $0x1fff, %ebx; \ + orl %ebx, %edx; \ +int80_ret_start_marker: \ + movl %edx, %esp; \ + movl %ecx, %cr3; \ + \ + __RESTORE_ALL; \ +int80_ret_end_marker: \ +2: + +#else /* !CONFIG_X86_HIGH_ENTRY */ + +#define __SWITCH_KERNELSPACE +#define __SWITCH_USERSPACE + +#endif + +#define __SAVE_ALL \ cld; \ pushl %es; \ pushl %ds; \ @@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) movl %edx, %ds; \ movl %edx, %es; -#define RESTORE_INT_REGS \ +#define __RESTORE_INT_REGS \ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) popl %ebp; \ popl %eax -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ -2: popl %es; \ +#define __RESTORE_REGS \ + __RESTORE_INT_REGS; \ +111: popl %ds; \ +222: popl %es; \ .section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ -4: movl $0,(%esp); \ - jmp 2b; \ +444: movl $0,(%esp); \ + jmp 111b; \ +555: movl $0,(%esp); \ + jmp 222b; \ .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ + .long 111b,444b;\ + .long 222b,555b;\ .previous - -#define RESTORE_ALL \ - RESTORE_REGS \ +#define __RESTORE_ALL \ + __RESTORE_REGS \ addl $4, %esp; \ -1: iret; \ +333: iret; \ .section .fixup,"ax"; \ -2: sti; \ +666: sti; \ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ @@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,2b; \ + .long 333b,666b;\ .previous +#define SAVE_ALL \ + __SAVE_ALL; \ + __SWITCH_KERNELSPACE; \ + STACK_OVERFLOW_TEST; + +#define RESTORE_ALL \ + __SWITCH_USERSPACE; \ + __RESTORE_ALL; +.section .entry.text,"ax" ENTRY(lcall7) pushfl # We get a different stack layout with call @@ -163,7 +280,7 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # andl $-8192, %ebp # GET_THREAD_INFO - movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain + movl TI_exec_domain(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp popl %eax @@ -208,7 +325,7 @@ ENTRY(resume_userspace) cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending @@ -216,18 +333,18 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all need_resched: - movl TI_FLAGS(%ebp), %ecx # need_resched set ? + movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all - movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) + movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) sti call schedule - movl $0,TI_PRE_COUNT(%ebp) + movl $0,TI_preempt_count(%ebp) cli jmp need_resched #endif @@ -246,37 +363,50 @@ sysenter_past_esp: pushl $(__USER_CS) pushl $SYSENTER_RETURN -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - pushl %eax SAVE_ALL GET_THREAD_INFO(%ebp) cmpl $(nr_syscalls), %eax jae syscall_badsys - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + + GET_THREAD_INFO(%ebp) + movl TI_virtual_stack(%ebp), %edx + movl TI_user_pgd(%ebp), %ecx + movl %esp, %ebx + andl $0x1fff, %ebx + orl %ebx, %edx +sysexit_ret_start_marker: + movl %edx, %esp + movl %ecx, %cr3 +#endif + /* + * only ebx is not restored by the userspace sysenter vsyscall + * code, it assumes it to be callee-saved. + */ + movl EBX(%esp), %ebx + /* if something modifies registers it must also disable sysexit */ + movl EIP(%esp), %edx movl OLDESP(%esp), %ecx + sti sysexit +#ifdef CONFIG_X86_SWITCH_PAGETABLES +sysexit_ret_end_marker: + nop +#endif # system call handler stub @@ -287,7 +417,7 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) @@ -296,10 +426,23 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work restore_all: +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernelX # returning to kernel or vm86-space + + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jz resume_kernelX + + int $3 + +resume_kernelX: +#endif RESTORE_ALL # perform work that needs to be done immediately before resumption @@ -312,7 +455,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all @@ -327,6 +470,22 @@ work_notifysig: # deal with pending s # vm86-space xorl %edx, %edx call do_notify_resume + +#if CONFIG_X86_HIGH_ENTRY + /* + * Reload db7 if necessary: + */ + movl TI_flags(%ebp), %ecx + testb $_TIF_DB7, %cl + jnz work_db7 + + jmp restore_all + +work_db7: + movl TI_task(%ebp), %edx; + movl task_thread_db7(%edx), %edx; + movl %edx, %db7; +#endif jmp restore_all ALIGN @@ -382,7 +541,7 @@ syscall_badsys: */ .data ENTRY(interrupt) -.text +.previous vector=0 ENTRY(irq_entries_start) @@ -392,7 +551,7 @@ ENTRY(irq_entries_start) jmp common_interrupt .data .long 1b -.text +.previous vector=vector+1 .endr @@ -433,12 +592,17 @@ error_code: movl ES(%esp), %edi # get the function address movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) - movl %esp, %edx pushl %esi # push the error code - pushl %edx # push the pt_regs pointer movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es + +/* clobbers edx, ebx and ebp */ + __SWITCH_KERNELSPACE + + leal 4(%esp), %edx # prepare pt_regs + pushl %edx # push pt_regs + call *%edi addl $8, %esp jmp ret_from_exception @@ -515,8 +679,8 @@ ENTRY(nmi) /* Do not access memory above the end of our stack page, * it might not exist. */ - andl $0x1fff,%eax - cmpl $0x1fec,%eax + andl $(THREAD_SIZE-1),%eax + cmpl $(THREAD_SIZE-20),%eax popl %eax jae nmi_stack_correct cmpl $sysenter_entry,12(%esp) @@ -529,7 +693,7 @@ nmi_stack_correct: pushl %edx call do_nmi addl $8, %esp - RESTORE_ALL + jmp restore_all nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -606,6 +770,8 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.previous + .data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/entry_trampoline.c 2004-02-17 22:26:09.000000000 -0800 @@ -0,0 +1,73 @@ +/* + * linux/arch/i386/kernel/entry_trampoline.c + * + * (C) Copyright 2003 Ingo Molnar + * + * This file contains the needed support code for 4GB userspace + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; + +void __init init_entry_mappings(void) +{ +#ifdef CONFIG_X86_HIGH_ENTRY + void *tramp; + + /* + * We need a high IDT and GDT for the 4G/4G split: + */ + trap_init_virtual_IDT(); + + __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); + __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); + tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); + + printk("mapped 4G/4G trampoline to %p.\n", tramp); + BUG_ON((void *)&__start___entry_text != tramp); + /* + * Virtual kernel stack: + */ + BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); + BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); + BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); + + /* + * set up the initial thread's virtual stack related + * fields: + */ + current->thread.stack_page0 = virt_to_page((char *)current->thread_info); + current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); + current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); + + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); + __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); + +#endif + current->thread_info->real_stack = (void *)current->thread_info; + current->thread_info->user_pgd = NULL; + current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; +} + + + +void __init entry_trampoline_setup(void) +{ + /* + * old IRQ entries set up by the boot code will still hang + * around - they are a sign of hw trouble anyway, now they'll + * produce a double fault message. + */ + trap_init_virtual_GDT(); +} --- linux-2.6.3/arch/i386/kernel/head.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/head.S 2004-02-17 22:26:07.000000000 -0800 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -325,12 +327,12 @@ rp_sidt: ret ENTRY(stack_start) - .long init_thread_union+8192 + .long init_thread_union+THREAD_SIZE .long __BOOT_DS /* This is the default interrupt "handler" :-) */ int_msg: - .asciz "Unknown interrupt\n" + .asciz "Unknown interrupt or fault at EIP %p %p %p\n" ALIGN ignore_int: cld @@ -342,9 +344,17 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es + pushl 16(%esp) + pushl 24(%esp) + pushl 32(%esp) + pushl 40(%esp) pushl $int_msg call printk popl %eax + popl %eax + popl %eax + popl %eax + popl %eax popl %ds popl %es popl %edx @@ -377,23 +387,27 @@ cpu_gdt_descr: .fill NR_CPUS-1,8,0 # space for the other GDT descriptors /* - * This is initialized to create an identity-mapping at 0-8M (for bootup - * purposes) and another mapping of the 0-8M area at virtual address + * This is initialized to create an identity-mapping at 0-16M (for bootup + * purposes) and another mapping of the 0-16M area at virtual address * PAGE_OFFSET. */ .org 0x1000 ENTRY(swapper_pg_dir) .long 0x00102007 .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 - /* default: 766 entries */ + .long 0x00104007 + .long 0x00105007 + .fill BOOT_USER_PGD_PTRS-4,4,0 + /* default: 764 entries */ .long 0x00102007 .long 0x00103007 - /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 + .long 0x00104007 + .long 0x00105007 + /* default: 252 entries */ + .fill BOOT_KERNEL_PGD_PTRS-4,4,0 /* - * The page tables are initialized to only 8MB here - the final page + * The page tables are initialized to only 16MB here - the final page * tables are set up later depending on memory size. */ .org 0x2000 @@ -402,15 +416,21 @@ ENTRY(pg0) .org 0x3000 ENTRY(pg1) +.org 0x4000 +ENTRY(pg2) + +.org 0x5000 +ENTRY(pg3) + /* * empty_zero_page must immediately follow the page tables ! (The * initialization loop counts until empty_zero_page) */ -.org 0x4000 +.org 0x6000 ENTRY(empty_zero_page) -.org 0x5000 +.org 0x7000 /* * Real beginning of normal "text" segment @@ -419,12 +439,12 @@ ENTRY(stext) ENTRY(_stext) /* - * This starts the data section. Note that the above is all - * in the text section because it has alignment requirements - * that we cannot fulfill any other way. + * This starts the data section. */ .data +.align PAGE_SIZE_asm + /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ @@ -439,7 +459,9 @@ ENTRY(boot_gdt_table) .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ #endif - .align L1_CACHE_BYTES + +.align PAGE_SIZE_asm + ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ --- linux-2.6.3/arch/i386/kernel/i386_ksyms.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/i386_ksyms.c 2004-02-17 22:26:07.000000000 -0800 @@ -97,7 +97,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter EXPORT_SYMBOL_NOVERS(__down_failed_trylock); EXPORT_SYMBOL_NOVERS(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); /* Delay loops */ EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__udelay); @@ -111,13 +110,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); +#if !defined(CONFIG_X86_UACCESS_INDIRECT) EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__direct_strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_from_user_ll); EXPORT_SYMBOL(__copy_to_user_ll); EXPORT_SYMBOL(strnlen_user); +#else /* CONFIG_X86_UACCESS_INDIRECT */ +EXPORT_SYMBOL(direct_csum_partial_copy_generic); +#endif EXPORT_SYMBOL(dma_alloc_coherent); EXPORT_SYMBOL(dma_free_coherent); --- linux-2.6.3/arch/i386/kernel/i387.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/i387.c 2004-02-17 22:26:07.000000000 -0800 @@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct * static int convert_fxsr_to_user( struct _fpstate __user *buf, struct i387_fxsave_struct *fxsave ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpreg __user *to; struct _fpxreg *from; @@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) return 1; - to = &buf->_st[0]; + to = tmp; from = (struct _fpxreg *) &fxsave->st_space[0]; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f+1); + to->exponent = from->exponent; } + if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) + return 1; return 0; } static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, struct _fpstate __user *buf ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpxreg *to; struct _fpreg __user *from; @@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) return 1; + if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) + return 1; fxsave->cwd = (unsigned short)(env[0] & 0xffff); fxsave->swd = (unsigned short)(env[1] & 0xffff); @@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc fxsave->fos = env[6]; to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; + from = tmp; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f + 1); + to->exponent = from->exponent; } return 0; } --- linux-2.6.3/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/init_task.c 2004-02-17 22:26:07.000000000 -0800 @@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm); */ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; + { INIT_THREAD_INFO(init_task, init_thread_union) }; /* * Initial task structure. @@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; --- linux-2.6.3/arch/i386/kernel/io_apic.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/io_apic.c 2004-02-17 22:26:00.000000000 -0800 @@ -280,7 +280,7 @@ static void set_ioapic_affinity_irq(unsi spin_unlock_irqrestore(&ioapic_lock, flags); } -#if defined(CONFIG_SMP) +#if defined(CONFIG_IRQBALANCE) # include /* kernel_thread() */ # include /* kstat */ # include /* kmalloc() */ @@ -317,8 +317,7 @@ struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) \ - ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -401,6 +400,7 @@ static void do_irq_balance(void) unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; + int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; @@ -452,7 +452,7 @@ static void do_irq_balance(void) for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); @@ -471,7 +471,7 @@ tryanothercpu: for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; @@ -551,9 +551,14 @@ tryanotherirq: * We seek the least loaded sibling by making the comparison * (A+B)/2 vs B */ - if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > - CPU_IRQ(cpu_sibling_map[min_loaded])) - min_loaded = cpu_sibling_map[min_loaded]; + load = CPU_IRQ(min_loaded) >> 1; + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + if (load > CPU_IRQ(j)) { + /* This won't change cpu_sibling_map[min_loaded] */ + load = CPU_IRQ(j); + min_loaded = j; + } + } cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); @@ -689,9 +694,11 @@ static inline void move_irq(int irq) __initcall(balanced_irq_init); -#else /* !SMP */ +#else /* !CONFIG_IRQBALANCE */ static inline void move_irq(int irq) { } +#endif /* CONFIG_IRQBALANCE */ +#ifndef CONFIG_SMP void send_IPI_self(int vector) { unsigned int cfg; @@ -706,7 +713,7 @@ void send_IPI_self(int vector) */ apic_write_around(APIC_ICR, cfg); } -#endif /* defined(CONFIG_SMP) */ +#endif /* !CONFIG_SMP */ /* @@ -2150,6 +2157,10 @@ static inline void check_timer(void) { int pin1, pin2; int vector; + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2163,11 +2174,17 @@ static inline void check_timer(void) * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * disabled in the local APIC. Finally timer interrupts + * need to be acknowledged manually in the 8259A for + * do_slow_timeoffset() and for the i82489DX when using + * the NMI watchdog. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; + if (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)) + timer_ack = 1; + else + timer_ack = !cpu_has_tsc; enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); @@ -2185,7 +2202,8 @@ static inline void check_timer(void) disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); - check_nmi_watchdog(); + if (check_nmi_watchdog() < 0); + timer_ack = !cpu_has_tsc; } return; } @@ -2208,7 +2226,8 @@ static inline void check_timer(void) add_pin_to_irq(0, 0, pin2); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); - check_nmi_watchdog(); + if (check_nmi_watchdog() < 0); + timer_ack = !cpu_has_tsc; } return; } --- linux-2.6.3/arch/i386/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/irq.c 2004-02-17 22:25:46.000000000 -0800 @@ -435,7 +435,7 @@ asmlinkage unsigned int do_IRQ(struct pt long esp; __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (8191)); + "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + 1024))) { printk("do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); @@ -508,6 +508,8 @@ out: irq_exit(); + kgdb_process_breakpoint(); + return 1; } @@ -927,7 +929,7 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 . static int irq_affinity_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -968,7 +970,7 @@ static int irq_affinity_write_proc(struc static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/kgdb_stub.c 2004-02-17 22:08:45.000000000 -0800 @@ -0,0 +1,2457 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned int esp; + int array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned int OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movl %eax,%esp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addl $-4,%ebx\n\t" + "movl (%ebx), %eax\n\t" + "pushl %eax\n\t" + "cmpl %esp,%ecx\n\t" + "jne 1b\n\t" + "popl %eax\n\t" + "popl %ebx\n\t" + "popl %ecx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("EAX=%08lx ", regs->eax); + printk("EBX=%08lx ", regs->ebx); + printk("ECX=%08lx ", regs->ecx); + printk("EDX=%08lx ", regs->edx); + printk("\n"); + printk("ESI=%08lx ", regs->esi); + printk("EDI=%08lx ", regs->edi); + printk("EBP=%08lx ", regs->ebp); + printk("ESP=%08lx ", (long) ®s->esp); + printk("\n"); + printk(" DS=%08x ", regs->xds); + printk(" ES=%08x ", regs->xes); + printk(" SS=%08x ", __KERNEL_DS); + printk(" FL=%08lx ", regs->eflags); + printk("\n"); + printk(" CS=%08x ", regs->xcs); + printk(" IP=%08lx ", regs->eip); +#if 0 + printk(" FS=%08x ", regs->fs); + printk(" GS=%08x ", regs->gs); +#endif + printk("\n"); + +} /* print_regs */ + +#define NEW_esp fn_call_lookaside[trap_cpu].esp + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ + gdb_regs[_ESP] = NEW_esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; + NEW_esp = gdb_regs[_ESP]; /* keep the value */ +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) +{ + unsigned long stack_page; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_ESP] = + (int) &kgdb_info.cpus_waiting[i].regs->esp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; + gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); + gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + stack_page = (unsigned long) p->thread_info; + if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (gdb_regs[_EBP] < stack_page || + gdb_regs[_EBP] > 8184 + stack_page) + return; + gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); + gdb_regs[_ESP] = gdb_regs[_EBP] + 8; + gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; + if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) + return; + } while (count++ < 16); + return; +} + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int mem_err = 0; +static volatile int mem_err_expected = 0; +static volatile int mem_err_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val, int may_fault) +{ + /* + * This code traps references to the area mapped to the kernel + * stack as given by the regs and, instead, stores to the + * fn_call_lookaside[cpu].array + */ + if (may_fault && + (unsigned int) addr < OLD_esp && + ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned flags; + int cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + kgdb_local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + int addr, length; + int breakno, breaktype; + char *ptr; + int newPC; + threadref thref; + int threadid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + int gdb_regs[NUMREGBYTES / 4]; + int dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) +#define NUMREGS NUMREGBYTES/4 + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + kgdb_local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time = 0, end_time, dum = 0; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movl %0,%%db7" + : /* no output */ + :"r"(0)); + + asm volatile ("movl %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (int) (&linux_regs->esp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES, 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). These may have + * meaning for ptrace, but for us it is safe to + * ignor them. We do this by dumping them into + * _GS which we also ignor, but do have memory for. + */ + int regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToInt(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + regno = + (regno >= NUMREGS ? _GS : regno); + hex2mem(ptr, (char *) &gdb_regs[regno], + 4, 0); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + mem2hex((char *) addr, + remcomOutBuffer, length, 1); + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { + hex2mem(ptr, (char *) addr, length, 1); + + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%x\n", addr); + + regs.eip = addr; + } + + newPC = regs.eip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + int *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (int); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->xcs; + *--ptr = linux_regs->eip; + *--ptr = linux_regs->ecx; + *--ptr = linux_regs->ebx; + *--ptr = linux_regs->eax; + linux_regs->ecx = NEW_esp - (sizeof (int) * 6); + linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->eip = (unsigned int) fn_call_stub; + } else { + linux_regs->eip = (unsigned int) fn_rtn_stub; + linux_regs->eax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + kgdb_local_irq_restore(flags); + return (0); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + kgdb_local_irq_restore(flags); + return (0); +} + +/* this function is used to set up exception handlers for tracing and + * breakpoints. + * This function is not needed as the above line does all that is needed. + * We leave it for backward compatitability... + */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + + * But really folks, every hear of labeled common, an old Fortran + * concept. Lots of folks can reference it and it is define if + * anyone does. Only one can initialize it at link time. We do + * this with the hook. See the statement above. No need for any + * executable code and it is ready as soon as the kernel is + * loaded. Very desirable in kernel debugging. + + linux_debug_hook = handle_exception ; + */ + + /* In case GDB is started before us, ack any packets (presumably + "$?#xx") sitting there. + putDebugChar ('+'); + + initialized = 1; + */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + kgdb_local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + kgdb_local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +} + --- linux-2.6.3/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/ldt.c 2004-02-17 22:26:07.000000000 -0800 @@ -2,7 +2,7 @@ * linux/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 1999, 2003 Ingo Molnar */ #include @@ -18,6 +18,8 @@ #include #include #include +#include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) @@ -29,34 +31,31 @@ static void flush_ldt(void *null) static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; - int oldsize; + int oldsize, newsize, i; if (mincount <= pc->size) return 0; + /* + * LDT got larger - reallocate if necessary. + */ oldsize = pc->size; mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); + newsize = mincount*LDT_ENTRY_SIZE; + for (i = 0; i < newsize; i += PAGE_SIZE) { + int nr = i/PAGE_SIZE; + BUG_ON(i >= 64*1024); + if (!pc->ldt_pages[nr]) { + pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); + if (!pc->ldt_pages[nr]) + return -ENOMEM; + clear_highpage(pc->ldt_pages[nr]); + } + } pc->size = mincount; - wmb(); - if (reload) { #ifdef CONFIG_SMP cpumask_t mask; + preempt_disable(); load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); @@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i load_LDT(pc); #endif } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } return 0; } static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { - int err = alloc_ldt(new, old->size, 0); - if (err < 0) + int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; + + err = alloc_ldt(new, size, 0); + if (err < 0) { + new->size = 0; return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + } + for (i = 0; i < nr_pages; i++) + copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); return 0; } @@ -96,6 +94,7 @@ int init_new_context(struct task_struct init_MUTEX(&mm->context.sem); mm->context.size = 0; + memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); @@ -107,23 +106,21 @@ int init_new_context(struct task_struct /* * No need to lock the MM as we are the last user + * Do not touch the ldt register, we are already + * in the next thread. */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } + int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) + __free_page(mm->context.ldt_pages[i]); + mm->context.size = 0; } static int read_ldt(void __user * ptr, unsigned long bytecount) { - int err; + int err, i; unsigned long size; struct mm_struct * mm = current->mm; @@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u size = bytecount; err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; + /* + * This is necessary just in case we got here straight from a + * context-switch where the ptes were set but no tlb flush + * was done yet. We rather avoid doing a TLB flush in the + * context-switch path and do it here instead. + */ + __flush_tlb_global(); + + for (i = 0; i < size; i += PAGE_SIZE) { + int nr = i / PAGE_SIZE, bytes; + char *kaddr = kmap(mm->context.ldt_pages[nr]); + + bytes = size - i; + if (bytes > PAGE_SIZE) + bytes = PAGE_SIZE; + if (copy_to_user(ptr + i, kaddr, size - i)) + err = -EFAULT; + kunmap(mm->context.ldt_pages[nr]); + } up(&mm->context.sem); if (err < 0) return err; @@ -158,7 +172,7 @@ static int read_default_ldt(void __user err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = 5*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); + /* + * No rescheduling allowed from this point to the install. + * + * We do a TLB flush for the same reason as in the read_ldt() path. + */ + preempt_disable(); + __flush_tlb_global(); + lp = (__u32 *) ((ldt_info.entry_number << 3) + + (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { @@ -221,6 +243,7 @@ install: *lp = entry_1; *(lp+1) = entry_2; error = 0; + preempt_enable(); out_unlock: up(&mm->context.sem); @@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, } return ret; } + +/* + * load one particular LDT into the current CPU + */ +void load_LDT_nolock(mm_context_t *pc, int cpu) +{ + struct page **pages = pc->ldt_pages; + int count = pc->size; + int nr_pages, i; + + if (likely(!count)) { + pages = &default_ldt_page; + count = 5; + } + nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) { + __kunmap_atomic_type(KM_LDT_PAGE0 - i); + __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); + } + set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); + load_LDT_desc(); +} --- linux-2.6.3/arch/i386/kernel/Makefile 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/Makefile 2004-02-17 22:26:07.000000000 -0800 @@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o + doublefault.o entry_trampoline.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o @@ -31,6 +32,7 @@ obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o EXTRA_AFLAGS := -traditional --- linux-2.6.3/arch/i386/kernel/microcode.c 2003-10-25 14:45:44.000000000 -0700 +++ 25/arch/i386/kernel/microcode.c 2004-02-17 22:25:11.000000000 -0800 @@ -507,3 +507,4 @@ static void __exit microcode_exit (void) module_init(microcode_init) module_exit(microcode_exit) +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); --- linux-2.6.3/arch/i386/kernel/mpparse.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/mpparse.c 2004-02-17 22:26:07.000000000 -0800 @@ -668,7 +668,7 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); --- linux-2.6.3/arch/i386/kernel/nmi.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/nmi.c 2004-02-17 22:25:47.000000000 -0800 @@ -31,7 +31,16 @@ #include #include +#ifdef CONFIG_KGDB +#include +#ifdef CONFIG_SMP +unsigned int nmi_watchdog = NMI_IO_APIC; +#else +unsigned int nmi_watchdog = NMI_LOCAL_APIC; +#endif +#else unsigned int nmi_watchdog = NMI_NONE; +#endif static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ extern void show_registers(struct pt_regs *regs); @@ -42,7 +51,7 @@ extern void show_registers(struct pt_reg * be enabled * -1: the lapic NMI watchdog is disabled, but can be enabled */ -static int nmi_active; +int nmi_active; #define K7_EVNTSEL_ENABLE (1 << 22) #define K7_EVNTSEL_INT (1 << 20) @@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; } +#ifdef CONFIG_KGDB +int tune_watchdog = 5*HZ; +#endif void nmi_watchdog_tick (struct pt_regs * regs) { @@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; +#ifdef CONFIG_KGDB + if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { + +#else if (last_irq_sums[cpu] == sum) { +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; +#ifdef CONFIG_KGDB + if (alert_counter[cpu] == tune_watchdog) { + kgdb_handle_exception(2, SIGPWR, 0, regs); + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +#endif if (alert_counter[cpu] == 5*nmi_hz) { spin_lock(&nmi_print_lock); /* @@ -462,6 +486,7 @@ void nmi_watchdog_tick (struct pt_regs * } } +EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(disable_lapic_nmi_watchdog); EXPORT_SYMBOL(enable_lapic_nmi_watchdog); --- linux-2.6.3/arch/i386/kernel/process.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/process.c 2004-02-17 22:26:07.000000000 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_MATH_EMULATION #include #endif @@ -304,6 +305,9 @@ void flush_thread(void) struct task_struct *tsk = current; memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); +#ifdef CONFIG_X86_HIGH_ENTRY + clear_thread_flag(TIF_DB7); +#endif memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -317,9 +321,8 @@ void release_thread(struct task_struct * if (dead_task->mm) { // temporary debugging check if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", + printk("WARNING: dead process %8s still has LDT? <%d>\n", dead_task->comm, - dead_task->mm->context.ldt, dead_task->mm->context.size); BUG(); } @@ -354,7 +357,17 @@ int copy_thread(int nr, unsigned long cl p->thread.esp = (unsigned long) childregs; p->thread.esp0 = (unsigned long) (childregs+1); + /* + * get the two stack pages, for the virtual stack. + * + * IMPORTANT: this code relies on the fact that the task + * structure is an 8K aligned piece of physical memory. + */ + p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); + p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); + p->thread.eip = (unsigned long) ret_from_fork; + p->thread_info->real_stack = p->thread_info; savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); @@ -506,10 +519,41 @@ struct task_struct * __switch_to(struct __unlazy_fpu(prev_p); +#ifdef CONFIG_X86_HIGH_ENTRY + /* + * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is + * needed because otherwise NMIs could interrupt the + * user-return code with a virtual stack and stale TLBs.) + */ + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(next->stack_page0, KM_VSTACK0); + __kmap_atomic(next->stack_page1, KM_VSTACK1); + + /* + * NOTE: here we rely on the task being the stack as well + */ + next_p->thread_info->virtual_stack = + (void *)__kmap_atomic_vaddr(KM_VSTACK0); + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) + /* + * If next was preempted on entry from userspace to kernel, + * and now it's on a different cpu, we need to adjust %esp. + * This assumes that entry.S does not copy %esp while on the + * virtual stack (with interrupts enabled): which is so, + * except within __SWITCH_KERNELSPACE itself. + */ + if (unlikely(next->esp >= TASK_SIZE)) { + next->esp &= THREAD_SIZE - 1; + next->esp |= (unsigned long) next_p->thread_info->virtual_stack; + } +#endif +#endif /* * Reload esp0, LDT and the page table pointer: */ - load_esp0(tss, next); + load_virtual_esp0(tss, next_p); /* * Load the per-thread Thread-Local Storage descriptor. @@ -637,6 +681,8 @@ extern void scheduling_functions_start_h extern void scheduling_functions_end_here(void); #define first_sched ((unsigned long) scheduling_functions_start_here) #define last_sched ((unsigned long) scheduling_functions_end_here) +#define top_esp (THREAD_SIZE - sizeof(unsigned long)) +#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) unsigned long get_wchan(struct task_struct *p) { @@ -647,12 +693,12 @@ unsigned long get_wchan(struct task_stru return 0; stack_page = (unsigned long)p->thread_info; esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > 8188+stack_page) + if (!stack_page || esp < stack_page || esp > top_esp+stack_page) return 0; /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ebp = *(unsigned long *) esp; do { - if (ebp < stack_page || ebp > 8184+stack_page) + if (ebp < stack_page || ebp > top_ebp+stack_page) return 0; eip = *(unsigned long *) (ebp+4); if (eip < first_sched || eip >= last_sched) --- linux-2.6.3/arch/i386/kernel/reboot.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/reboot.c 2004-02-17 22:26:07.000000000 -0800 @@ -155,12 +155,11 @@ void machine_real_restart(unsigned char CMOS_WRITE(0x00, 0x8f); spin_unlock_irqrestore(&rtc_lock, flags); - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); + /* + * Remap the first 16 MB of RAM (which includes the kernel image) + * at virtual address zero: + */ + setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); /* * Use `swapper_pg_dir' as our page directory. --- linux-2.6.3/arch/i386/kernel/setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/setup.c 2004-02-17 22:08:46.000000000 -0800 @@ -1118,6 +1118,19 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + extern void setup_early_printk(char *); + + setup_early_printk(s); + printk("early console enabled\n"); + } + } +#endif + + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH --- linux-2.6.3/arch/i386/kernel/signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/kernel/signal.c 2004-02-17 22:26:07.000000000 -0800 @@ -128,28 +128,29 @@ sys_sigaltstack(const stack_t __user *us */ static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) +restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *__sc, int *peax) { - unsigned int err = 0; + struct sigcontext scratch; /* 88 bytes of scratch area */ /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) + if (copy_from_user(&scratch, __sc, sizeof(scratch))) + return -EFAULT; + +#define COPY(x) regs->x = scratch.x #define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ regs->x##seg = tmp; } #define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ regs->x##seg = tmp|3; } #define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ loadsegment(seg,tmp); } GET_SEG(gs); @@ -168,27 +169,23 @@ restore_sigcontext(struct pt_regs *regs, COPY_SEG_STRICT(ss); { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); + unsigned int tmpflags = scratch.eflags; regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); regs->orig_eax = -1; /* disable syscall checks */ } { - struct _fpstate __user * buf; - err |= __get_user(buf, &sc->fpstate); + struct _fpstate * buf = scratch.fpstate; if (buf) { if (verify_area(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); + return -EFAULT; + if (restore_i387(buf)) + return -EFAULT; } } - err |= __get_user(*peax, &sc->eax); - return err; - -badframe: - return 1; + *peax = scratch.eax; + return 0; } asmlinkage int sys_sigreturn(unsigned long __unused) @@ -266,46 +263,47 @@ badframe: */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate, struct pt_regs *regs, unsigned long mask) { - int tmp, err = 0; + struct sigcontext sc; /* 88 bytes of scratch area */ + int tmp; tmp = 0; __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->gs); + *(unsigned int *)&sc.gs = tmp; __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->fs); - - err |= __put_user(regs->xes, (unsigned int *)&sc->es); - err |= __put_user(regs->xds, (unsigned int *)&sc->ds); - err |= __put_user(regs->edi, &sc->edi); - err |= __put_user(regs->esi, &sc->esi); - err |= __put_user(regs->ebp, &sc->ebp); - err |= __put_user(regs->esp, &sc->esp); - err |= __put_user(regs->ebx, &sc->ebx); - err |= __put_user(regs->edx, &sc->edx); - err |= __put_user(regs->ecx, &sc->ecx); - err |= __put_user(regs->eax, &sc->eax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->eip, &sc->eip); - err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); - err |= __put_user(regs->eflags, &sc->eflags); - err |= __put_user(regs->esp, &sc->esp_at_signal); - err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + *(unsigned int *)&sc.fs = tmp; + *(unsigned int *)&sc.es = regs->xes; + *(unsigned int *)&sc.ds = regs->xds; + sc.edi = regs->edi; + sc.esi = regs->esi; + sc.ebp = regs->ebp; + sc.esp = regs->esp; + sc.ebx = regs->ebx; + sc.edx = regs->edx; + sc.ecx = regs->ecx; + sc.eax = regs->eax; + sc.trapno = current->thread.trap_no; + sc.err = current->thread.error_code; + sc.eip = regs->eip; + *(unsigned int *)&sc.cs = regs->xcs; + sc.eflags = regs->eflags; + sc.esp_at_signal = regs->esp; + *(unsigned int *)&sc.ss = regs->xss; tmp = save_i387(fpstate); if (tmp < 0) - err = 1; - else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + return 1; + sc.fpstate = tmp ? fpstate : NULL; /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + sc.oldmask = mask; + sc.cr2 = current->thread.cr2; - return err; + if (copy_to_user(__sc, &sc, sizeof(sc))) + return 1; + return 0; } /* @@ -443,7 +441,7 @@ static void setup_rt_frame(int sig, stru /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->esp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); --- linux-2.6.3/arch/i386/kernel/smpboot.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/smpboot.c 2004-02-17 22:26:01.000000000 -0800 @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -934,7 +935,7 @@ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -948,10 +949,13 @@ static void __init smp_boot_cpus(unsigne printk("CPU%d: ", 0); print_cpu_info(&cpu_data[0]); + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); current_thread_info()->cpu = 0; smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); /* * If we couldn't find an SMP configuration at boot time, @@ -1008,8 +1012,6 @@ static void __init smp_boot_cpus(unsigne setup_local_APIC(); map_cpu_to_logical_apicid(); - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) - BUG(); setup_portio_remap(); @@ -1080,32 +1082,34 @@ static void __init smp_boot_cpus(unsigne Dprintk("Boot done.\n"); /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); } smpboot_setup_io_apic(); @@ -1119,6 +1123,207 @@ static void __init smp_boot_cpus(unsigne synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->flags |= SD_FLAG_IDLE; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_online_map; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_online_map); + + if (cpus_empty(nodemask)) + continue; + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = cpu_sched_domain(j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_online_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + + mb(); + for_each_cpu_mask(i, cpu_online_map) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + struct sched_group *node_group = &sched_group_nodes[node]; + + cpu_domain->parent = phys_domain; + phys_domain->parent = node_domain; + + node_domain->groups = node_group; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#else /* CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_online_map; + phys_domain->flags |= SD_FLAG_IDLE; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + cpu_domain->parent = phys_domain; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) --- linux-2.6.3/arch/i386/kernel/smp.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/smp.c 2004-02-17 22:26:07.000000000 -0800 @@ -327,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt if (flush_mm == cpu_tlbstate[cpu].active_mm) { if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { +#ifndef CONFIG_X86_SWITCH_PAGETABLES if (flush_va == FLUSH_ALL) local_flush_tlb(); else __flush_tlb_one(flush_va); +#endif } else leave_mm(cpu); } @@ -396,21 +398,6 @@ static void flush_tlb_others(cpumask_t c spin_unlock(&tlbstate_lock); } -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - preempt_enable(); -} - void flush_tlb_mm (struct mm_struct * mm) { cpumask_t cpu_mask; @@ -442,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc if (current->active_mm == mm) { if(current->mm) - __flush_tlb_one(va); +#ifndef CONFIG_X86_SWITCH_PAGETABLES + __flush_tlb_one(va) +#endif + ; else leave_mm(smp_processor_id()); } @@ -466,7 +456,17 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, 0, 1, 1); } - +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing --- linux-2.6.3/arch/i386/kernel/sysenter.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/sysenter.c 2004-02-17 22:26:07.000000000 -0800 @@ -18,13 +18,18 @@ #include #include #include +#include extern asmlinkage void sysenter_entry(void); void enable_sep_cpu(void *info) { int cpu = get_cpu(); +#ifdef CONFIG_X86_HIGH_ENTRY + struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; +#else struct tss_struct *tss = init_tss + cpu; +#endif tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; --- linux-2.6.3/arch/i386/kernel/sys_i386.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/i386/kernel/sys_i386.c 2004-02-17 22:25:25.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -106,8 +107,6 @@ out: } -extern asmlinkage int sys_select(int, fd_set __user *, fd_set __user *, fd_set __user *, struct timeval __user *); - struct sel_arg_struct { unsigned long n; fd_set __user *inp, *outp, *exp; --- linux-2.6.3/arch/i386/kernel/timers/common.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/timers/common.c 2004-02-17 22:08:57.000000000 -0800 @@ -137,3 +137,23 @@ bad_calibration: } #endif +/* calculate cpu_khz */ +void __init init_cpu_khz(void) +{ + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if (tsc_quotient) { + /* report CPU clock rate in Hz. + * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + { unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (cpu_khz), "=d" (edx) + :"r" (tsc_quotient), + "0" (eax), "1" (edx)); + printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + } + } + } +} --- linux-2.6.3/arch/i386/kernel/timers/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/timers/Makefile 2004-02-17 22:08:57.000000000 -0800 @@ -6,3 +6,4 @@ obj-y := timer.o timer_none.o timer_tsc. obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o obj-$(CONFIG_HPET_TIMER) += timer_hpet.o +obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o --- linux-2.6.3/arch/i386/kernel/timers/timer.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/timers/timer.c 2004-02-17 22:08:57.000000000 -0800 @@ -19,6 +19,9 @@ static struct timer_opts* timers[] = { #ifdef CONFIG_HPET_TIMER &timer_hpet, #endif +#ifdef CONFIG_X86_PM_TIMER + &timer_pmtmr, +#endif &timer_tsc, &timer_pit, NULL, --- linux-2.6.3/arch/i386/kernel/timers/timer_cyclone.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/timers/timer_cyclone.c 2004-02-17 22:08:57.000000000 -0800 @@ -212,26 +212,7 @@ static int __init init_cyclone(char* ove } } - /* init cpu_khz. - * XXX - This should really be done elsewhere, - * and in a more generic fashion. -johnstul@us.ibm.com - */ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - } - } - } + init_cpu_khz(); /* Everything looks good! */ return 0; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/timers/timer_pm.c 2004-02-17 22:08:58.000000000 -0800 @@ -0,0 +1,217 @@ +/* + * (C) Dominik Brodowski 2003 + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/acpi/boot.c */ +u32 pmtmr_ioport = 0; + + +/* value of the Power timer at last timer interrupt */ +static u32 offset_tick; +static u32 offset_delay; + +static unsigned long long monotonic_base; +static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +/*helper function to safely read acpi pm timesource*/ +static inline u32 read_pmtmr(void) +{ + u32 v1=0,v2=0,v3=0; + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; +} + +static int init_pmtmr(char* override) +{ + u32 value1, value2; + unsigned int i; + + if (override[0] && strncmp(override,"pmtmr",5)) + return -ENODEV; + + if (!pmtmr_ioport) + return -ENODEV; + + /* we use the TSC for delay_pmtmr, so make sure it exists */ + if (!cpu_has_tsc) + return -ENODEV; + + /* "verify" this timing source */ + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); + if (value2 == value1) + continue; + if (value2 > value1) + goto pm_good; + if ((value2 < value1) && ((value2) < 0xFFF)) + goto pm_good; + printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); + return -EINVAL; + } + printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); + return -ENODEV; + +pm_good: + init_cpu_khz(); + return 0; +} + +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} + +/* + * this gets called during each timer interrupt + * - Called while holding the writer xtime_lock + */ +static void mark_offset_pmtmr(void) +{ + u32 lost, delta, last_offset; + static int first_run = 1; + last_offset = offset_tick; + + write_seqlock(&monotonic_lock); + + offset_tick = read_pmtmr(); + + /* calculate tick interval */ + delta = (offset_tick - last_offset) & ACPI_PM_MASK; + + /* convert to usecs */ + delta = cyc2us(delta); + + /* update the monotonic base value */ + monotonic_base += delta * NSEC_PER_USEC; + write_sequnlock(&monotonic_lock); + + /* convert to ticks */ + delta += offset_delay; + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + + /* compensate for lost ticks */ + if (lost >= 2) + jiffies_64 += lost - 1; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } +} + + +static unsigned long long monotonic_clock_pmtmr(void) +{ + u32 last_offset, this_offset; + unsigned long long base, ret; + unsigned seq; + + + /* atomically read monotonic base & last_offset */ + do { + seq = read_seqbegin(&monotonic_lock); + last_offset = offset_tick; + base = monotonic_base; + } while (read_seqretry(&monotonic_lock, seq)); + + /* Read the pmtmr */ + this_offset = read_pmtmr(); + + /* convert to nanoseconds */ + ret = (this_offset - last_offset) & ACPI_PM_MASK; + ret = base + (cyc2us(ret) * NSEC_PER_USEC); + return ret; +} + +static void delay_pmtmr(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + + +/* + * get the offset (in microseconds) from the last call to mark_offset() + * - Called holding a reader xtime_lock + */ +static unsigned long get_offset_pmtmr(void) +{ + u32 now, offset, delta = 0; + + offset = offset_tick; + now = read_pmtmr(); + delta = (now - offset)&ACPI_PM_MASK; + + return (unsigned long) offset_delay + cyc2us(delta); +} + + +/* acpi timer_opts struct */ +struct timer_opts timer_pmtmr = { + .name = "pmtmr", + .init = init_pmtmr, + .mark_offset = mark_offset_pmtmr, + .get_offset = get_offset_pmtmr, + .monotonic_clock = monotonic_clock_pmtmr, + .delay = delay_pmtmr, +}; + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); --- linux-2.6.3/arch/i386/kernel/traps.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/traps.c 2004-02-17 22:26:07.000000000 -0800 @@ -54,12 +54,8 @@ #include "mach_traps.h" -asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; +struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; +struct page *default_ldt_page; /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; @@ -91,6 +87,43 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); +#ifdef CONFIG_KGDB +extern void sysenter_entry(void); +#include +#include +extern void int3(void); +extern void debug(void); +void set_intr_gate(unsigned int n, void *addr); +static void set_intr_usr_gate(unsigned int n, void *addr); +/* + * Should be able to call this breakpoint() very early in + * bring up. Just hard code the call where needed. + * The breakpoint() code is here because set_?_gate() functions + * are local (static) to trap.c. They need be done only once, + * but it does not hurt to do them over. + */ +void breakpoint(void) +{ + init_entry_mappings(); + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); + + BREAKPOINT; +} +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ + { \ + if (!user_mode(regs) ) \ + { \ + kgdb_handle_exception(trapnr, signr, error_code, regs); \ + after; \ + } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ + } +#else +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) +#endif + + static int kstack_depth_to_print = 24; void show_trace(struct task_struct *task, unsigned long * stack) @@ -119,7 +152,7 @@ void show_trace_task(struct task_struct unsigned long esp = tsk->thread.esp; /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1)) + if ((esp ^ (unsigned long)tsk->thread_info) & ~(THREAD_SIZE - 1)) return; show_trace(tsk, (unsigned long *)esp); } @@ -175,8 +208,9 @@ void show_registers(struct pt_regs *regs ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, + regs->eip, print_tainted(), regs->eflags); print_symbol("EIP is at %s\n", regs->eip); printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", @@ -192,23 +226,27 @@ void show_registers(struct pt_regs *regs * time of the fault.. */ if (in_kernel) { + u8 *eip; printk("\nStack: "); show_stack(NULL, (unsigned long*)esp); printk("Code: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - for(i=0;i<20;i++) - { - unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: + eip = (u8 *)regs->eip - 43; + for (i = 0; i < 64; i++, eip++) { + unsigned char c = 0xff; + + if ((user_mode(regs) && get_user(c, eip)) || + (!user_mode(regs) && __direct_get_user(c, eip))) { + printk(" Bad EIP value."); break; } - printk("%02x ", c); + if (eip == (u8 *)regs->eip) + printk("<%02x> ", c); + else + printk("%02x ", c); } } printk("\n"); @@ -255,12 +293,36 @@ spinlock_t die_lock = SPIN_LOCK_UNLOCKED void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; + int nl = 0; console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); handle_BUG(regs); printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP "); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); + nl = 1; +#endif + if (nl) + printk("\n"); +#ifdef CONFIG_KGDB + /* This is about the only place we want to go to kgdb even if in + * user mode. But we must go in via a trap so within kgdb we will + * always be in kernel mode. + */ + if (user_mode(regs)) + BREAKPOINT; +#endif + CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); @@ -330,6 +392,7 @@ static inline void do_trap(int trapnr, i #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -347,7 +410,9 @@ asmlinkage void do_##name(struct pt_regs #define DO_VM86_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ + return; \ } #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -394,8 +459,10 @@ gp_in_vm86: return; gp_in_kernel: - if (!fixup_exception(regs)) + if (!fixup_exception(regs)){ + CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) die("general protection fault", regs, error_code); + } } static void mem_parity_error(unsigned char reason, struct pt_regs * regs) @@ -534,10 +601,18 @@ asmlinkage void do_debug(struct pt_regs if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); - /* Mask out spurious debug traps due to lazy DR7 setting */ + /* + * Mask out spurious debug traps due to lazy DR7 setting or + * due to 4G/4G kernel mode: + */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg[7]) goto clear_dr7; + if (!user_mode(regs)) { + // restore upon return-to-userspace: + set_thread_flag(TIF_DB7); + goto clear_dr7; + } } if (regs->eflags & VM_MASK) @@ -557,8 +632,18 @@ asmlinkage void do_debug(struct pt_regs * allowing programs to debug themselves without the ptrace() * interface. */ +#ifdef CONFIG_KGDB + /* + * I think this is the only "real" case of a TF in the kernel + * that really belongs to user space. Others are + * "Ours all ours!" + */ + if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry)) + goto clear_TF_reenable; +#else if ((regs->xcs & 3) == 0) goto clear_TF_reenable; +#endif if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) goto clear_TF; } @@ -570,6 +655,17 @@ asmlinkage void do_debug(struct pt_regs info.si_errno = 0; info.si_code = TRAP_BRKPT; +#ifdef CONFIG_KGDB + /* + * If this is a kernel mode trap, we need to reset db7 to allow us + * to continue sanely ALSO skip the signal delivery + */ + if ((regs->xcs & 3) == 0) + goto clear_dr7; + + /* if not kernel, allow ints but only if they were on */ + if ( regs->eflags & 0x200) local_irq_enable(); +#endif /* If this is a kernel mode trap, save the user PC on entry to * the kernel, that's what the debugger can make sense of. */ @@ -584,6 +680,7 @@ clear_dr7: __asm__("movl %0,%%db7" : /* no output */ : "r" (0)); + CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) return; debug_vm86: @@ -779,19 +876,53 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) +void __init trap_init_virtual_IDT(void) { - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. + * "idt" is magic - it overlaps the idt_descr + * variable so that updating idt will automatically + * update the idt descriptor.. */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); + __set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + idt_descr.address = __fix_to_virt(FIX_IDT); + __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); } + +void __init trap_init_virtual_GDT(void) +{ + int cpu = smp_processor_id(); + struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu; + struct Xgt_desc_struct tmp_desc = {0, 0}; + struct tss_struct * t; + + __asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory"); + +#ifdef CONFIG_X86_HIGH_ENTRY + if (!cpu) { + __set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL); + __set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_0, __pa(init_tss), PAGE_KERNEL); + __set_fixmap(FIX_TSS_1, __pa(init_tss) + 1*PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_2, __pa(init_tss) + 2*PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_3, __pa(init_tss) + 3*PAGE_SIZE, PAGE_KERNEL); + } + + gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu; +#else + gdt_desc->address = (unsigned long)cpu_gdt_table[cpu]; +#endif + __asm__ __volatile__("lgdt %0": "=m" (*gdt_desc)); + +#ifdef CONFIG_X86_HIGH_ENTRY + t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; +#else + t = init_tss + cpu; #endif + set_tss_desc(cpu, t); + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; + load_TR_desc(); +} #define _set_gate(gate_addr,type,dpl,addr,seg) \ do { \ @@ -818,20 +949,26 @@ void set_intr_gate(unsigned int n, void _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); } -static void __init set_trap_gate(unsigned int n, void *addr) +void __init set_trap_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); } -static void __init set_system_gate(unsigned int n, void *addr) +void __init set_system_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); } -static void __init set_call_gate(void *a, void *addr) +void __init set_call_gate(void *a, void *addr) { _set_gate(a,12,3,addr,__KERNEL_CS); } +#ifdef CONFIG_KGDB +void set_intr_usr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); +} +#endif static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { @@ -850,11 +987,16 @@ void __init trap_init(void) #ifdef CONFIG_X86_LOCAL_APIC init_apic_mappings(); #endif + init_entry_mappings(); set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); +#ifndef CONFIG_KGDB set_system_gate(3,&int3); /* int3-5 can be called from all */ +#else + set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ +#endif set_system_gate(4,&overflow); set_system_gate(5,&bounds); set_trap_gate(6,&invalid_op); --- linux-2.6.3/arch/i386/kernel/vm86.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/vm86.c 2004-02-17 22:26:07.000000000 -0800 @@ -125,7 +125,7 @@ struct pt_regs * save_v86_state(struct k tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; - load_esp0(tss, ¤t->thread); + load_virtual_esp0(tss, current); current->thread.saved_esp0 = 0; put_cpu(); @@ -305,7 +305,7 @@ static void do_sys_vm86(struct kernel_vm tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; - load_esp0(tss, &tsk->thread); + load_virtual_esp0(tss, tsk); put_cpu(); tsk->thread.screen_bitmap = info->screen_bitmap; --- linux-2.6.3/arch/i386/kernel/vmlinux.lds.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vmlinux.lds.S 2004-02-17 22:26:07.000000000 -0800 @@ -3,6 +3,9 @@ */ #include +#include +#include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -10,7 +13,7 @@ ENTRY(startup_32) jiffies = jiffies_64; SECTIONS { - . = 0xC0000000 + 0x100000; + . = __PAGE_OFFSET + 0x100000; /* read-only */ _text = .; /* Text and read-only data */ .text : { @@ -19,6 +22,19 @@ SECTIONS *(.gnu.warning) } = 0x9090 +#ifdef CONFIG_X86_4G + . = ALIGN(PAGE_SIZE_asm); + __entry_tramp_start = .; + . = FIX_ENTRY_TRAMPOLINE_0_addr; + __start___entry_text = .; + .entry.text : AT (__entry_tramp_start) { *(.entry.text) } + __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text); + . = __entry_tramp_end; + . = ALIGN(PAGE_SIZE_asm); +#else + .entry.text : { *(.entry.text) } +#endif + _etext = .; /* End of text section */ . = ALIGN(16); /* Exception table */ @@ -34,15 +50,12 @@ SECTIONS CONSTRUCTORS } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __nosave_begin = .; .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __nosave_end = .; - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - . = ALIGN(32); .data.cacheline_aligned : { *(.data.cacheline_aligned) } @@ -52,7 +65,7 @@ SECTIONS .data.init_task : { *(.data.init_task) } /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE_asm); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -91,7 +104,7 @@ SECTIONS from .altinstructions and .eh_frame */ .exit.text : { *(.exit.text) } .exit.data : { *(.exit.data) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; @@ -99,10 +112,22 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __init_end = .; /* freed after init ends here */ - + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_tss : { *(.data.tss) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_default_ldt : { *(.data.default_ldt) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_idt : { *(.data.idt) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_gdt : { *(.data.gdt) } + __bss_start = .; /* BSS */ .bss : { *(.bss) } __bss_stop = .; @@ -122,4 +147,6 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + } --- linux-2.6.3/arch/i386/kernel/vsyscall.lds 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vsyscall.lds 2004-02-17 22:26:07.000000000 -0800 @@ -5,7 +5,7 @@ */ /* This must match . */ -VSYSCALL_BASE = 0xffffe000; +VSYSCALL_BASE = 0xffffd000; SECTIONS { --- linux-2.6.3/arch/i386/kernel/vsyscall-sysenter.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vsyscall-sysenter.S 2004-02-17 22:26:07.000000000 -0800 @@ -7,6 +7,11 @@ .type __kernel_vsyscall,@function __kernel_vsyscall: .LSTART_vsyscall: + cmpl $192, %eax + jne 1f + int $0x80 + ret +1: push %ecx .Lpush_ecx: push %edx --- linux-2.6.3/arch/i386/lib/checksum.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/checksum.S 2004-02-17 22:26:07.000000000 -0800 @@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic ( .previous .align 4 -.globl csum_partial_copy_generic +.globl direct_csum_partial_copy_generic #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 -csum_partial_copy_generic: +direct_csum_partial_copy_generic: subl $4,%esp pushl %edi pushl %esi @@ -422,7 +422,7 @@ DST( movb %cl, (%edi) ) #define ARGBASE 12 -csum_partial_copy_generic: +direct_csum_partial_copy_generic: pushl %ebx pushl %edi pushl %esi --- linux-2.6.3/arch/i386/lib/dec_and_lock.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/dec_and_lock.c 2004-02-17 22:26:05.000000000 -0800 @@ -10,6 +10,7 @@ #include #include +#ifndef ATOMIC_DEC_AND_LOCK int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { int counter; @@ -38,3 +39,5 @@ slow_path: spin_unlock(lock); return 0; } +#endif + --- linux-2.6.3/arch/i386/lib/getuser.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/getuser.S 2004-02-17 22:26:07.000000000 -0800 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -28,7 +29,7 @@ .globl __get_user_1 __get_user_1: GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 1: movzbl (%eax),%edx xorl %eax,%eax @@ -40,7 +41,7 @@ __get_user_2: addl $1,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 2: movzwl -1(%eax),%edx xorl %eax,%eax @@ -52,7 +53,7 @@ __get_user_4: addl $3,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 3: movl -3(%eax),%edx xorl %eax,%eax --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/lib/kgdb_serial.c 2004-02-17 22:08:44.000000000 -0800 @@ -0,0 +1,499 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#ifdef CONFIG_DISCONTIGMEM +static inline int kgdb_mem_init_done(void) +{ + return highmem_start_page != NULL; +} +#else +static inline int kgdb_mem_init_done(void) +{ + return max_mapnr != 0; +} +#endif + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.3/arch/i386/lib/Makefile 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/Makefile 2004-02-17 22:08:43.000000000 -0800 @@ -9,4 +9,5 @@ lib-y = checksum.o delay.o \ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o --- linux-2.6.3/arch/i386/lib/usercopy.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/lib/usercopy.c 2004-02-17 22:26:07.000000000 -0800 @@ -76,7 +76,7 @@ do { \ * and returns @count. */ long -__strncpy_from_user(char *dst, const char __user *src, long count) +__direct_strncpy_from_user(char *dst, const char __user *src, long count) { long res; __do_strncpy_from_user(dst, src, count, res); @@ -102,7 +102,7 @@ __strncpy_from_user(char *dst, const cha * and returns @count. */ long -strncpy_from_user(char *dst, const char __user *src, long count) +direct_strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) @@ -147,7 +147,7 @@ do { \ * On success, this will be zero. */ unsigned long -clear_user(void __user *to, unsigned long n) +direct_clear_user(void __user *to, unsigned long n) { might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) @@ -167,7 +167,7 @@ clear_user(void __user *to, unsigned lon * On success, this will be zero. */ unsigned long -__clear_user(void __user *to, unsigned long n) +__direct_clear_user(void __user *to, unsigned long n) { __do_clear_user(to, n); return n; @@ -184,7 +184,7 @@ __clear_user(void __user *to, unsigned l * On exception, returns 0. * If the string is too long, returns a value greater than @n. */ -long strnlen_user(const char __user *s, long n) +long direct_strnlen_user(const char __user *s, long n) { unsigned long mask = -__addr_ok(s); unsigned long res, tmp; @@ -575,3 +575,4 @@ unsigned long __copy_from_user_ll(void * n = __copy_user_zeroing_intel(to, (const void *) from, n); return n; } + --- linux-2.6.3/arch/i386/Makefile 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/Makefile 2004-02-17 22:25:07.000000000 -0800 @@ -19,7 +19,7 @@ LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := -CFLAGS += -pipe +CFLAGS += -pipe -msoft-float # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) @@ -34,8 +34,9 @@ cflags-$(CONFIG_M586MMX) += $(call check cflags-$(CONFIG_M686) += -march=i686 cflags-$(CONFIG_MPENTIUMII) += $(call check_gcc,-march=pentium2,-march=i686) cflags-$(CONFIG_MPENTIUMIII) += $(call check_gcc,-march=pentium3,-march=i686) +cflags-$(CONFIG_MPENTIUMM) += $(call check_gcc,-march=pentium3,-march=i686) cflags-$(CONFIG_MPENTIUM4) += $(call check_gcc,-march=pentium4,-march=i686) -cflags-$(CONFIG_MK6) += $(call check_gcc,-march=k6,-march=i586) +cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += $(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4) @@ -47,6 +48,18 @@ cflags-$(CONFIG_MWINCHIP3D) += $(call ch cflags-$(CONFIG_MCYRIXIII) += $(call check_gcc,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MVIAC3_2) += $(call check_gcc,-march=c3-2,-march=i686) +# AMD Elan support +cflags-$(CONFIG_X86_ELAN) += -march=i486 + +# -mregparm=3 works ok on gcc-3.0 and later +# +GCC_VERSION := $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC)) +cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;) + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call check_gcc,-funit-at-a-time,) + CFLAGS += $(cflags-y) # Default subarch .c files @@ -84,6 +97,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default +mflags-$(CONFIG_KGDB) += -gdwarf-2 +mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ --- linux-2.6.3/arch/i386/math-emu/fpu_system.h 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/math-emu/fpu_system.h 2004-02-17 22:26:07.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include /* This sets the pointer FPU_info to point to the argument part of the stack frame of math_emulate() */ @@ -22,7 +23,7 @@ /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#define LDT_DESCRIPTOR(s) (((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3]) #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) --- linux-2.6.3/arch/i386/mm/fault.c 2003-12-17 21:20:01.000000000 -0800 +++ 25/arch/i386/mm/fault.c 2004-02-17 22:26:07.000000000 -0800 @@ -27,6 +27,7 @@ #include #include #include +#include extern void die(const char *,struct pt_regs *,long); @@ -104,8 +105,17 @@ static inline unsigned long get_segment_ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ down(¤t->mm->context.sem); +#if 1 + /* horrible hack for 4/4 disabled kernels. + I'm not quite sure what the TLB flush is good for, + it's mindlessly copied from the read_ldt code */ + __flush_tlb_global(); + desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]); + desc = (void *)desc + ((seg & ~7) % PAGE_SIZE); +#else desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); +#endif } else { /* Must disable preemption while reading the GDT. */ desc = (u32 *)&cpu_gdt_table[get_cpu()]; @@ -118,6 +128,9 @@ static inline unsigned long get_segment_ (desc[1] & 0xff000000); if (seg & (1<<2)) { +#if 1 + kunmap((void *)((unsigned long)desc & PAGE_MASK)); +#endif up(¤t->mm->context.sem); } else put_cpu(); @@ -243,6 +256,19 @@ asmlinkage void do_page_fault(struct pt_ * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 1) == 0. */ +#ifdef CONFIG_X86_4G + /* + * On 4/4 all kernels faults are either bugs, vmalloc or prefetch + */ + if (unlikely((regs->xcs & 3) == 0)) { + if (error_code & 3) + goto bad_area_nosemaphore; + + /* If it's vm86 fall through */ + if (!(regs->eflags & VM_MASK)) + goto vmalloc_fault; + } +#else if (unlikely(address >= TASK_SIZE)) { if (!(error_code & 5)) goto vmalloc_fault; @@ -252,6 +278,7 @@ asmlinkage void do_page_fault(struct pt_ */ goto bad_area_nosemaphore; } +#endif mm = tsk->mm; @@ -403,6 +430,12 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ +#ifdef CONFIG_KGDB + if (!user_mode(regs)){ + kgdb_handle_exception(14,SIGBUS, error_code, regs); + return; + } +#endif bust_spinlocks(1); --- linux-2.6.3/arch/i386/mm/hugetlbpage.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/mm/hugetlbpage.c 2004-02-17 22:26:01.000000000 -0800 @@ -61,6 +61,27 @@ static struct page *alloc_fresh_huge_pag static void free_huge_page(struct page *page); +#ifdef CONFIG_NUMA + +static inline void huge_inc_rss(struct mm_struct *mm, struct page *page) +{ + mm->rss += (HPAGE_SIZE / PAGE_SIZE); + mm->pernode_rss[page_to_nid(page)] += (HPAGE_SIZE / PAGE_SIZE); +} + +static inline void huge_dec_rss(struct mm_struct *mm, struct page *page) +{ + mm->rss -= (HPAGE_SIZE / PAGE_SIZE); + mm->pernode_rss[page_to_nid(page)] -= (HPAGE_SIZE / PAGE_SIZE); +} + +#else /* !CONFIG_NUMA */ + +#define huge_inc_rss(mm, page) ((mm)->rss += (HPAGE_SIZE / PAGE_SIZE)) +#define huge_dec_rss(mm, page) ((mm)->rss -= (HPAGE_SIZE / PAGE_SIZE)) + +#endif /* CONFIG_NUMA */ + static struct page *alloc_hugetlb_page(void) { int i; @@ -105,7 +126,7 @@ static void set_huge_pte(struct mm_struc { pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + huge_inc_rss(mm, page); if (write_access) { entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); @@ -145,7 +166,7 @@ int copy_hugetlb_page_range(struct mm_st ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + huge_inc_rss(dst, ptepage); addr += HPAGE_SIZE; } return 0; @@ -314,8 +335,8 @@ void unmap_hugepage_range(struct vm_area page = pte_page(*pte); huge_page_release(page); pte_clear(pte); + huge_dec_rss(mm, page); } - mm->rss -= (end - start) >> PAGE_SHIFT; flush_tlb_range(vma, start, end); } --- linux-2.6.3/arch/i386/mm/init.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/mm/init.c 2004-02-17 22:26:08.000000000 -0800 @@ -40,125 +40,13 @@ #include #include #include +#include DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int do_test_wp_bit(void); -/* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - if (pmd_table != pmd_offset(pgd, 0)) - BUG(); -#else - pmd_table = pmd_offset(pgd, 0); -#endif - - return pmd_table; -} - -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static pte_t * __init one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); - if (page_table != pte_offset_kernel(pmd, 0)) - BUG(); - - return page_table; - } - - return pte_offset_kernel(pmd, 0); -} - -/* - * This function initializes a certain range of kernel virtual memory - * with new bootmem page tables, everywhere page tables are missing in - * the given range. - */ - -/* - * NOTE: The pagetables are allocated contiguous on the physical space - * so we can cache the place of the first one and move around without - * checking the pgd every time. - */ -static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - int pgd_idx, pmd_idx; - unsigned long vaddr; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pmd_idx = pmd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (pgd_none(*pgd)) - one_md_table_init(pgd); - - pmd = pmd_offset(pgd, vaddr); - for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - if (pmd_none(*pmd)) - one_page_table_init(pmd); - - vaddr += PMD_SIZE; - } - pmd_idx = 0; - } -} - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET. - */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) -{ - unsigned long pfn; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int pgd_idx, pmd_idx, pte_ofs; - - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; - - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { - /* Map with big pages if possible, otherwise create normal page tables. */ - if (cpu_has_pse) { - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); - pfn += PTRS_PER_PTE; - } else { - pte = one_page_table_init(pmd); - - for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); - } - } - } -} - static inline int page_kills_ppro(unsigned long pagenr) { if (pagenr >= 0x70000 && pagenr <= 0x7003F) @@ -206,11 +94,8 @@ static inline int page_is_ram(unsigned l return 0; } -#ifdef CONFIG_HIGHMEM pte_t *kmap_pte; -pgprot_t kmap_prot; -EXPORT_SYMBOL(kmap_prot); EXPORT_SYMBOL(kmap_pte); #define kmap_get_fixmap_pte(vaddr) \ @@ -218,29 +103,7 @@ EXPORT_SYMBOL(kmap_pte); void __init kmap_init(void) { - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} - -void __init permanent_kmaps_init(pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + pgd_index(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; + kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN)); } void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) @@ -255,6 +118,8 @@ void __init one_highpage_init(struct pag SetPageReserved(page); } +#ifdef CONFIG_HIGHMEM + #ifndef CONFIG_DISCONTIGMEM void __init set_highmem_pages_init(int bad_ppro) { @@ -266,12 +131,9 @@ void __init set_highmem_pages_init(int b #else extern void set_highmem_pages_init(int); #endif /* !CONFIG_DISCONTIGMEM */ - #else -#define kmap_init() do { } while (0) -#define permanent_kmaps_init(pgd_base) do { } while (0) -#define set_highmem_pages_init(bad_ppro) do { } while (0) -#endif /* CONFIG_HIGHMEM */ +# define set_highmem_pages_init(bad_ppro) do { } while (0) +#endif unsigned long __PAGE_KERNEL = _PAGE_KERNEL; @@ -281,30 +143,125 @@ unsigned long __PAGE_KERNEL = _PAGE_KERN extern void __init remap_numa_kva(void); #endif -static void __init pagetable_init (void) +static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_base + pgd_index(address); + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); + } +} + +static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) { unsigned long vaddr; - pgd_t *pgd_base = swapper_pg_dir; + for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE) + prepare_pagetables(pgd_base, vaddr); +} + +void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) +{ + unsigned long vaddr; + pgd_t *pgd; + int i, j, k; + pmd_t *pmd; + pte_t *pte, *pte_base; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + if (cpu_has_pse) { + unsigned long __pe; + + set_in_cr4(X86_CR4_PSE); + boot_cpu_data.wp_works_ok = 1; + __pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start; + /* Make it "global" too if supported */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); +#if !defined(CONFIG_X86_SWITCH_PAGETABLES) + __pe += _PAGE_GLOBAL; + __PAGE_KERNEL |= _PAGE_GLOBAL; +#endif + } + set_pmd(pmd, __pmd(__pe)); + continue; + } + if (!pmd_present(*pmd)) + pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + else + pte_base = (pte_t *) page_address(pmd_page(*pmd)); + pte = pte_base; + for (k = 0; k < PTRS_PER_PTE; pte++, k++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + *pte = mk_pte_phys(vaddr-start, PAGE_KERNEL); + } + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); + } + } +} + +static void __init pagetable_init (void) +{ + unsigned long vaddr, end; + pgd_t *pgd_base; #ifdef CONFIG_X86_PAE int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); #endif - /* Enable PSE if available */ - if (cpu_has_pse) { - set_in_cr4(X86_CR4_PSE); - } + /* + * This can be zero as well - no problem, in that case we exit + * the loops anyway due to the PTRS_PER_* conditions. + */ + end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; + pgd_base = swapper_pg_dir; +#ifdef CONFIG_X86_PAE + /* + * It causes too many problems if there's no proper pmd set up + * for all 4 entries of the PGD - so we allocate all of them. + * PAE systems will not miss this extra 4-8K anyway ... + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1)); } +#endif + /* + * Set up lowmem-sized identity mappings at PAGE_OFFSET: + */ + setup_identity_mappings(pgd_base, PAGE_OFFSET, end); - kernel_physical_mapping_init(pgd_base); + /* + * Add flat-mode identity-mappings - SMP needs it when + * starting up on an AP from real-mode. (In the non-PAE + * case we already have these mappings through head.S.) + * All user-space mappings are explicitly cleared after + * SMP startup. + */ +#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE) + setup_identity_mappings(pgd_base, 0, 16*1024*1024); +#endif remap_numa_kva(); /* @@ -312,38 +269,64 @@ static void __init pagetable_init (void) * created - mappings will be set by set_fixmap(): */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - page_table_range_init(vaddr, 0, pgd_base); + fixrange_init(vaddr, 0, pgd_base); - permanent_kmaps_init(pgd_base); +#ifdef CONFIG_HIGHMEM + { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; + } #endif } -void zap_low_mappings (void) +/* + * Clear kernel pagetables in a PMD_SIZE-aligned range. + */ +static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) { - int i; + unsigned long vaddr; + pgd_t *pgd; + pmd_t *pmd; + int i, j; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + pmd_clear(pmd); + } + } + flush_tlb_all(); +} + +void zap_low_mappings(void) +{ + printk("zapping low mappings.\n"); /* * Zap initial low-memory mappings. - * - * Note that "pgd_clear()" doesn't do it for - * us, because pgd_clear() is a no-op on i386. */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else - set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif - flush_tlb_all(); + clear_mappings(swapper_pg_dir, 0, 16*1024*1024); } #ifndef CONFIG_DISCONTIGMEM @@ -393,7 +376,15 @@ void __init paging_init(void) set_in_cr4(X86_CR4_PAE); #endif __flush_tlb_all(); - + /* + * Subtle. SMP is doing it's boot stuff late (because it has to + * fork idle threads) - but it also needs low mappings for the + * protected-mode entry to work. We zap these entries only after + * the WP-bit has been tested. + */ +#ifndef CONFIG_SMP + zap_low_mappings(); +#endif kmap_init(); zone_sizes_init(); } @@ -515,22 +506,18 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); - /* - * Subtle. SMP is doing it's boot stuff late (because it has to - * fork idle threads) - but it also needs low mappings for the - * protected-mode entry to work. We zap these entries only after - * the WP-bit has been tested. - */ -#ifndef CONFIG_SMP - zap_low_mappings(); -#endif + entry_trampoline_setup(); + default_ldt_page = virt_to_page(default_ldt); + load_LDT(&init_mm.context); } -kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; +kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache; void __init pgtable_cache_init(void) { + void (*ctor)(void *, kmem_cache_t *, unsigned long); + void (*dtor)(void *, kmem_cache_t *, unsigned long); + if (PTRS_PER_PMD > 1) { pmd_cache = kmem_cache_create("pmd", PTRS_PER_PMD*sizeof(pmd_t), @@ -540,13 +527,36 @@ void __init pgtable_cache_init(void) NULL); if (!pmd_cache) panic("pgtable_cache_init(): cannot create pmd cache"); + + if (TASK_SIZE > PAGE_OFFSET) { + kpmd_cache = kmem_cache_create("kpmd", + PTRS_PER_PMD*sizeof(pmd_t), + 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, + kpmd_ctor, + NULL); + if (!kpmd_cache) + panic("pgtable_cache_init(): " + "cannot create kpmd cache"); + } } + + if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET) + ctor = pgd_ctor; + else + ctor = NULL; + + if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET) + dtor = pgd_dtor; + else + dtor = NULL; + pgd_cache = kmem_cache_create("pgd", PTRS_PER_PGD*sizeof(pgd_t), 0, SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, - pgd_ctor, - PTRS_PER_PMD == 1 ? pgd_dtor : NULL); + ctor, + dtor); if (!pgd_cache) panic("pgtable_cache_init(): Cannot create pgd cache"); } --- linux-2.6.3/arch/i386/mm/pgtable.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/i386/mm/pgtable.c 2004-02-17 22:26:07.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include void show_mem(void) { @@ -157,11 +158,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } +void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags) +{ + pmd_t *kpmd, *pmd; + kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1], + (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE); + pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS); + + memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t)); + memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t)); +} + /* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking + * List of all pgd's needed so it can invalidate entries in both cached + * and uncached pgd's. This is essentially codepath-based locking * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. @@ -170,30 +180,60 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * could be used. The locking scheme was chosen on the basis of * manfred's recommendations and having no core impact whatsoever. * -- wli + * + * The entire issue goes away when XKVA is configured. */ spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED; LIST_HEAD(pgd_list); -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +/* + * This is not that hard to figure out. + * (a) PTRS_PER_PMD == 1 means non-PAE. + * (b) PTRS_PER_PMD > 1 means PAE. + * (c) TASK_SIZE > PAGE_OFFSET means XKVA. + * (d) TASK_SIZE <= PAGE_OFFSET means non-XKVA. + * + * Do *NOT* back out the preconstruction like the patch I'm cleaning + * up after this very instant did, or at all, for that matter. + * This is never called when PTRS_PER_PMD > 1 && TASK_SIZE > PAGE_OFFSET. + * -- wli + */ +void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused) { + pgd_t *pgd = (pgd_t *)__pgd; unsigned long flags; - if (PTRS_PER_PMD == 1) - spin_lock_irqsave(&pgd_lock, flags); + if (PTRS_PER_PMD == 1) { + if (TASK_SIZE <= PAGE_OFFSET) + spin_lock_irqsave(&pgd_lock, flags); + else + memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS], + &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS], + NR_SHARED_PMDS * sizeof(pgd_t)); + } - memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + if (TASK_SIZE <= PAGE_OFFSET) + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); if (PTRS_PER_PMD > 1) return; - list_add(&virt_to_page(pgd)->lru, &pgd_list); - spin_unlock_irqrestore(&pgd_lock, flags); - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + if (TASK_SIZE > PAGE_OFFSET) + memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t)); + else { + list_add(&virt_to_page(pgd)->lru, &pgd_list); + spin_unlock_irqrestore(&pgd_lock, flags); + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + } } -/* never called when PTRS_PER_PMD > 1 */ +/* + * Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET + * for with PAE we would list_del() multiple times, and for non-PAE + * with XKVA all the AGP pgd shootdown code is unnecessary. + */ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ @@ -203,6 +243,12 @@ void pgd_dtor(void *pgd, kmem_cache_t *c spin_unlock_irqrestore(&pgd_lock, flags); } +/* + * See the comments above pgd_ctor() wrt. preconstruction. + * Do *NOT* memcpy() here. If you do, you back out important + * anti- cache pollution code. + * + */ pgd_t *pgd_alloc(struct mm_struct *mm) { int i; @@ -211,15 +257,33 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (PTRS_PER_PMD == 1 || !pgd) return pgd; + /* + * In the 4G userspace case alias the top 16 MB virtual + * memory range into the user mappings as well (these + * include the trampoline and CPU data structures). + */ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + kmem_cache_t *cache; + pmd_t *pmd; + + if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) + cache = kpmd_cache; + else + cache = pmd_cache; + + pmd = kmem_cache_alloc(cache, GFP_KERNEL); if (!pmd) goto out_oom; set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); } - return pgd; + return pgd; out_oom: + /* + * we don't have to handle the kpmd_cache here, since it's the + * last allocation, and has either nothing to free or when it + * succeeds the whole operation succeeds. + */ for (i--; i >= 0; i--) kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); kmem_cache_free(pgd_cache, pgd); @@ -230,10 +294,29 @@ void pgd_free(pgd_t *pgd) { int i; - /* in the PAE case user pgd entries are overwritten before usage */ - if (PTRS_PER_PMD > 1) - for (i = 0; i < USER_PTRS_PER_PGD; ++i) - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); /* in the non-PAE case, clear_page_tables() clears user pgd entries */ + if (PTRS_PER_PMD == 1) + goto out_free; + + /* in the PAE case user pgd entries are overwritten before usage */ + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { + kmem_cache_t *cache; + pmd_t *pmd = __va(pgd_val(pgd[i]) - 1); + + /* + * only userspace pmd's are cleared for us + * by mm/memory.c; it's a slab cache invariant + * that we must separate the kernel pmd slab + * all times, else we'll have bad pmd's. + */ + if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) + cache = kpmd_cache; + else + cache = pmd_cache; + + kmem_cache_free(cache, pmd); + } +out_free: kmem_cache_free(pgd_cache, pgd); } + --- linux-2.6.3/arch/i386/oprofile/nmi_int.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/oprofile/nmi_int.c 2004-02-17 22:25:47.000000000 -0800 @@ -335,7 +335,9 @@ static int __init ppro_init(void) if (cpu_model > 0xd) return 0; - if (cpu_model > 5) { + if (cpu_model == 9) { + nmi_ops.cpu_type = "i386/p6_mobile"; + } else if (cpu_model > 5) { nmi_ops.cpu_type = "i386/piii"; } else if (cpu_model > 2) { nmi_ops.cpu_type = "i386/pii"; --- linux-2.6.3/arch/i386/oprofile/nmi_timer_int.c 2003-06-22 12:04:43.000000000 -0700 +++ 25/arch/i386/oprofile/nmi_timer_int.c 2004-02-17 22:25:47.000000000 -0800 @@ -48,9 +48,13 @@ static struct oprofile_operations nmi_ti .cpu_type = "timer" }; - int __init nmi_timer_init(struct oprofile_operations ** ops) { + extern int nmi_active; + + if (nmi_active <= 0) + return -ENODEV; + *ops = &nmi_timer_ops; printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); return 0; --- linux-2.6.3/arch/i386/oprofile/op_model_p4.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/oprofile/op_model_p4.c 2004-02-17 22:09:01.000000000 -0800 @@ -382,11 +382,8 @@ static struct p4_event_binding p4_events static unsigned int get_stagger(void) { #ifdef CONFIG_SMP - int cpu; - if (smp_num_siblings > 1) { - cpu = smp_processor_id(); - return (cpu_sibling_map[cpu] > cpu) ? 0 : 1; - } + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); #endif return 0; } --- linux-2.6.3/arch/i386/oprofile/op_model_ppro.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/oprofile/op_model_ppro.c 2004-02-17 22:25:47.000000000 -0800 @@ -13,6 +13,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -101,6 +102,10 @@ static int ppro_check_ctrs(unsigned int } } + /* Only P6 based Pentium M need to re-unmask the apic vector but it + * doesn't hurt other P6 variant */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + /* We can't work out if we really handled an interrupt. We * might have caught a *second* counter just after overflowing * the interrupt for this counter then arrives --- linux-2.6.3/arch/i386/pci/common.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/pci/common.c 2004-02-17 22:25:59.000000000 -0800 @@ -20,7 +20,8 @@ extern void pcibios_sort(void); #endif -unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; +unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | + PCI_PROBE_MMCONF; int pcibios_last_bus = -1; struct pci_bus *pci_root_bus = NULL; @@ -198,6 +199,12 @@ char * __devinit pcibios_setup(char *st return NULL; } #endif +#ifdef CONFIG_PCI_MMCONFIG + else if (!strcmp(str, "nommconf")) { + pci_probe &= ~PCI_PROBE_MMCONF; + return NULL; + } +#endif else if (!strcmp(str, "noacpi")) { acpi_noirq_set(); return NULL; --- linux-2.6.3/arch/i386/pci/Makefile 2003-07-02 14:53:12.000000000 -0700 +++ 25/arch/i386/pci/Makefile 2004-02-17 22:25:59.000000000 -0800 @@ -1,6 +1,7 @@ obj-y := i386.o obj-$(CONFIG_PCI_BIOS) += pcbios.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o obj-$(CONFIG_PCI_DIRECT) += direct.o pci-y := fixup.o --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/pci/mmconfig.c 2004-02-17 22:25:59.000000000 -0800 @@ -0,0 +1,109 @@ +/* + * mmconfig.c - Low-level direct PCI config space access via MMCONFIG + */ + +#include +#include +#include "pci.h" + +/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ +u32 pci_mmcfg_base_addr; + +#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG)) + +/* The base address of the last MMCONFIG device accessed */ +static u32 mmcfg_last_accessed_device; + +/* + * Functions for accessing PCI configuration space with MMCONFIG accesses + */ + +static inline void pci_exp_set_dev_base(int bus, int devfn) +{ + u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12); + if (dev_base != mmcfg_last_accessed_device) { + mmcfg_last_accessed_device = dev_base; + set_fixmap(FIX_PCIE_MCFG, dev_base); + } +} + +static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(bus, devfn); + + switch (len) { + case 1: + *value = readb(mmcfg_virt_addr + reg); + break; + case 2: + *value = readw(mmcfg_virt_addr + reg); + break; + case 4: + *value = readl(mmcfg_virt_addr + reg); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(bus, devfn); + + switch (len) { + case 1: + writeb(value, mmcfg_virt_addr + reg); + break; + case 2: + writew(value, mmcfg_virt_addr + reg); + break; + case 4: + writel(value, mmcfg_virt_addr + reg); + break; + } + + /* Dummy read to flush PCI write */ + readl(mmcfg_virt_addr); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static struct pci_raw_ops pci_mmcfg = { + .read = pci_mmcfg_read, + .write = pci_mmcfg_write, +}; + +static int __init pci_mmcfg_init(void) +{ + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + goto out; + if (!pci_mmcfg_base_addr) + goto out; + + printk(KERN_INFO "PCI: Using MMCONFIG\n"); + raw_pci_ops = &pci_mmcfg; + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; + + out: + return 0; +} + +arch_initcall(pci_mmcfg_init); --- linux-2.6.3/arch/i386/pci/pci.h 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/pci/pci.h 2004-02-17 22:25:59.000000000 -0800 @@ -15,6 +15,9 @@ #define PCI_PROBE_BIOS 0x0001 #define PCI_PROBE_CONF1 0x0002 #define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f + #define PCI_NO_SORT 0x0100 #define PCI_BIOS_SORT 0x0200 #define PCI_NO_CHECKS 0x0400 --- linux-2.6.3/arch/ia64/hp/sim/simeth.c 2003-07-27 12:14:38.000000000 -0700 +++ 25/arch/ia64/hp/sim/simeth.c 2004-02-17 22:06:13.000000000 -0800 @@ -224,7 +224,7 @@ simeth_probe1(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3/arch/ia64/ia32/ia32_ioctl.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/ia64/ia32/ia32_ioctl.c 2004-02-17 22:25:22.000000000 -0800 @@ -8,6 +8,7 @@ */ #include /* argh, msdos_fs.h isn't self-contained... */ +#include #include "ia32priv.h" #define INCLUDES @@ -26,8 +27,6 @@ _ret; \ }) -asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - #define CODE #include "compat_ioctl.c" --- linux-2.6.3/arch/ia64/ia32/ia32_signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/ia64/ia32/ia32_signal.c 2004-02-17 22:25:24.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -550,9 +551,6 @@ sys32_rt_sigaction (int sig, struct siga } -extern asmlinkage long sys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, - size_t sigsetsize); - asmlinkage long sys32_rt_sigprocmask (int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize) { @@ -584,8 +582,6 @@ asmlinkage long sys32_rt_sigtimedwait (compat_sigset_t *uthese, siginfo_t32 *uinfo, struct compat_timespec *uts, unsigned int sigsetsize) { - extern asmlinkage long sys_rt_sigtimedwait (const sigset_t *, siginfo_t *, - const struct timespec *, size_t); extern int copy_siginfo_to_user32 (siginfo_t32 *, siginfo_t *); mm_segment_t old_fs = get_fs(); struct timespec t; @@ -611,7 +607,6 @@ sys32_rt_sigtimedwait (compat_sigset_t * asmlinkage long sys32_rt_sigqueueinfo (int pid, int sig, siginfo_t32 *uinfo) { - extern asmlinkage long sys_rt_sigqueueinfo (int, int, siginfo_t *); extern int copy_siginfo_from_user32 (siginfo_t *to, siginfo_t32 *from); mm_segment_t old_fs = get_fs(); siginfo_t info; --- linux-2.6.3/arch/ia64/ia32/sys_ia32.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/ia32/sys_ia32.c 2004-02-17 22:25:30.000000000 -0800 @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -55,6 +56,7 @@ #include #include #include +#include #include "ia32priv.h" @@ -81,16 +83,9 @@ #define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid)) #define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid)) -extern asmlinkage long sys_execve (char *, char **, char **, struct pt_regs *); -extern asmlinkage long sys_mprotect (unsigned long, size_t, unsigned long); -extern asmlinkage long sys_munmap (unsigned long, size_t); extern unsigned long arch_get_unmapped_area (struct file *, unsigned long, unsigned long, unsigned long, unsigned long); -/* forward declaration: */ -asmlinkage long sys32_mprotect (unsigned int, unsigned int, int); -asmlinkage unsigned long sys_brk(unsigned long); - /* * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore * while doing so. @@ -949,9 +944,6 @@ sys32_old_select (struct sel_arg_struct (struct compat_timeval *) A(a.tvp)); } -asmlinkage ssize_t sys_readv (unsigned long,const struct iovec *,unsigned long); -asmlinkage ssize_t sys_writev (unsigned long,const struct iovec *,unsigned long); - static struct iovec * get_compat_iovec (struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type) { @@ -2023,9 +2015,6 @@ restore_ia32_fpxstate (struct task_struc return 0; } -extern asmlinkage long sys_ptrace (long, pid_t, unsigned long, unsigned long, long, long, long, - long, long); - /* * Note that the IA32 version of `ptrace' calls the IA64 routine for * many of the requests. This will only work for requests that do @@ -2284,8 +2273,6 @@ sys32_pause (void) return -ERESTARTNOHAND; } -asmlinkage long sys_msync (unsigned long start, size_t len, int flags); - asmlinkage int sys32_msync (unsigned int start, unsigned int len, int flags) { @@ -2307,8 +2294,6 @@ struct sysctl32 { unsigned int __unused[4]; }; -extern asmlinkage long sys_sysctl(struct __sysctl_args *args); - asmlinkage long sys32_sysctl (struct sysctl32 *args) { @@ -2358,7 +2343,6 @@ sys32_sysctl (struct sysctl32 *args) asmlinkage long sys32_newuname (struct new_utsname *name) { - extern asmlinkage long sys_newuname(struct new_utsname * name); int ret = sys_newuname(name); if (!ret) @@ -2367,8 +2351,6 @@ sys32_newuname (struct new_utsname *name return ret; } -extern asmlinkage long sys_getresuid (uid_t *ruid, uid_t *euid, uid_t *suid); - asmlinkage long sys32_getresuid16 (u16 *ruid, u16 *euid, u16 *suid) { @@ -2385,8 +2367,6 @@ sys32_getresuid16 (u16 *ruid, u16 *euid, return ret; } -extern asmlinkage long sys_getresgid (gid_t *rgid, gid_t *egid, gid_t *sgid); - asmlinkage long sys32_getresgid16 (u16 *rgid, u16 *egid, u16 *sgid) { @@ -2407,65 +2387,100 @@ sys32_getresgid16 (u16 *rgid, u16 *egid, asmlinkage long sys32_lseek (unsigned int fd, int offset, unsigned int whence) { - extern off_t sys_lseek (unsigned int fd, off_t offset, unsigned int origin); - /* Sign-extension of "offset" is important here... */ return sys_lseek(fd, offset, whence); } -extern asmlinkage long sys_getgroups (int gidsetsize, gid_t *grouplist); +static int +groups16_to_user(short *grouplist, struct group_info *group_info) +{ + int i; + short group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (short)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int +groups16_from_user(struct group_info *group_info, short *grouplist) +{ + int i; + short group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} asmlinkage long sys32_getgroups16 (int gidsetsize, short *grouplist) { - mm_segment_t old_fs = get_fs(); - gid_t gl[NGROUPS]; - int ret, i; + int i; - set_fs(KERNEL_DS); - ret = sys_getgroups(gidsetsize, gl); - set_fs(old_fs); + if (gidsetsize < 0) + return -EINVAL; - if (gidsetsize && ret > 0 && ret <= NGROUPS) - for (i = 0; i < ret; i++, grouplist++) - if (put_user(gl[i], grouplist)) - return -EFAULT; - return ret; + get_group_info(current->group_info); + i = current->group_info->ngroups; + if (gidsetsize) { + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } + } +out: + put_group_info(current->group_info); + return i; } -extern asmlinkage long sys_setgroups (int gidsetsize, gid_t *grouplist); - asmlinkage long sys32_setgroups16 (int gidsetsize, short *grouplist) { - mm_segment_t old_fs = get_fs(); - gid_t gl[NGROUPS]; - int ret, i; + struct group_info *group_info; + int retval; - if ((unsigned) gidsetsize > NGROUPS) + if (!capable(CAP_SETGID)) + return -EPERM; + if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; - for (i = 0; i < gidsetsize; i++, grouplist++) - if (get_user(gl[i], grouplist)) - return -EFAULT; - set_fs(KERNEL_DS); - ret = sys_setgroups(gidsetsize, gl); - set_fs(old_fs); - return ret; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; } asmlinkage long sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi) { - extern asmlinkage long sys_truncate (const char *path, unsigned long length); - return sys_truncate((const char *) A(path), ((unsigned long) len_hi << 32) | len_lo); } asmlinkage long sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi) { - extern asmlinkage long sys_ftruncate (int fd, unsigned long length); - return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo); } @@ -2554,7 +2569,6 @@ struct sysinfo32 { asmlinkage long sys32_sysinfo (struct sysinfo32 *info) { - extern asmlinkage long sys_sysinfo (struct sysinfo *); struct sysinfo s; long ret, err; int bitcount = 0; @@ -2606,7 +2620,6 @@ sys32_sysinfo (struct sysinfo32 *info) asmlinkage long sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec *interval) { - extern asmlinkage long sys_sched_rr_get_interval (pid_t, struct timespec *); mm_segment_t old_fs = get_fs(); struct timespec t; long ret; @@ -2622,21 +2635,18 @@ sys32_sched_rr_get_interval (pid_t pid, asmlinkage long sys32_pread (unsigned int fd, void *buf, unsigned int count, u32 pos_lo, u32 pos_hi) { - extern asmlinkage long sys_pread64 (unsigned int, char *, size_t, loff_t); return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo); } asmlinkage long sys32_pwrite (unsigned int fd, void *buf, unsigned int count, u32 pos_lo, u32 pos_hi) { - extern asmlinkage long sys_pwrite64 (unsigned int, const char *, size_t, loff_t); return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo); } asmlinkage long sys32_sendfile (int out_fd, int in_fd, int *offset, unsigned int count) { - extern asmlinkage long sys_sendfile (int, int, off_t *, size_t); mm_segment_t old_fs = get_fs(); long ret; off_t of; @@ -2657,7 +2667,6 @@ sys32_sendfile (int out_fd, int in_fd, i asmlinkage long sys32_personality (unsigned int personality) { - extern asmlinkage long sys_personality (unsigned long); long ret; if (current->personality == PER_LINUX32 && personality == PER_LINUX) @@ -2949,8 +2958,6 @@ sys32_timer_create(u32 clock, struct sig return err; } -extern long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); - long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, __u32 len_low, __u32 len_high, int advice) { @@ -3049,9 +3056,6 @@ copy_mount_stuff_to_kernel(const void *u return 0; } -extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void *data); - #define SMBFS_NAME "smbfs" #define NCPFS_NAME "ncpfs" @@ -3116,8 +3120,6 @@ sys32_mount(char *dev_name, char *dir_na } } -extern asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); - asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid) { uid_t sruid, seuid; @@ -3127,8 +3129,6 @@ asmlinkage long sys32_setreuid(compat_ui return sys_setreuid(sruid, seuid); } -extern asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); - asmlinkage long sys32_setresuid(compat_uid_t ruid, compat_uid_t euid, compat_uid_t suid) @@ -3141,8 +3141,6 @@ sys32_setresuid(compat_uid_t ruid, compa return sys_setresuid(sruid, seuid, ssuid); } -extern asmlinkage long sys_setregid(gid_t rgid, gid_t egid); - asmlinkage long sys32_setregid(compat_gid_t rgid, compat_gid_t egid) { @@ -3153,8 +3151,6 @@ sys32_setregid(compat_gid_t rgid, compat return sys_setregid(srgid, segid); } -extern asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); - asmlinkage long sys32_setresgid(compat_gid_t rgid, compat_gid_t egid, compat_gid_t sgid) @@ -3284,8 +3280,6 @@ nfs_getfh32_res_trans(union nfsctl_res * return err; } -extern asmlinkage long sys_nfsservctl(int cmd, void *arg, void *resp); - int asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) { --- linux-2.6.3/arch/ia64/Kconfig 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/Kconfig 2004-02-17 22:26:05.000000000 -0800 @@ -662,6 +662,13 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + endmenu source "security/Kconfig" --- linux-2.6.3/arch/ia64/kernel/irq.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -940,7 +940,7 @@ void set_irq_affinity_info (unsigned int static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -1005,7 +1005,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/ia64/kernel/sys_ia64.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/ia64/kernel/sys_ia64.c 2004-02-17 22:25:24.000000000 -0800 @@ -15,6 +15,7 @@ #include /* doh, must come after sched.h... */ #include #include +#include #include #include @@ -74,7 +75,6 @@ arch_get_unmapped_area (struct file *fil asmlinkage long ia64_getpriority (int which, int who) { - extern long sys_getpriority (int, int); long prio; prio = sys_getpriority(which, who); --- linux-2.6.3/arch/ia64/lib/dec_and_lock.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ia64/lib/dec_and_lock.c 2004-02-17 22:26:06.000000000 -0800 @@ -13,6 +13,7 @@ #include #include +#ifndef CONFIG_LOCKMETER /* * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these * operations have to be done atomically, so that the count doesn't drop to zero without @@ -40,3 +41,4 @@ atomic_dec_and_lock (atomic_t *refcount, } EXPORT_SYMBOL(atomic_dec_and_lock); +#endif --- linux-2.6.3/arch/ia64/pci/pci.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/pci/pci.c 2004-02-17 22:25:59.000000000 -0800 @@ -55,8 +55,11 @@ struct pci_fixup pcibios_fixups[1]; #define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \ ((u64)(seg << 24) | (u64)(bus << 16) | \ - (u64)(devfn << 8) | (u64)(reg)) + (u64)(devfn << 8) | (u64)(reg)), 0 +#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \ + ((u64)(seg << 28) | (u64)(bus << 20) | \ + (u64)(devfn << 12) | (u64)(reg)), 1 static int pci_sal_read (int seg, int bus, int devfn, int reg, int len, u32 *value) @@ -64,10 +67,14 @@ pci_sal_read (int seg, int bus, int devf int result = 0; u64 data = 0; - if (!value || (seg > 255) || (bus > 255) || (devfn > 255) || (reg > 255)) + if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; - result = ia64_sal_pci_config_read(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, &data); + if ((seg < 256) && (reg < 256)) { + result = ia64_sal_pci_config_read(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, &data); + } else { + result = ia64_sal_pci_config_read(PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg), len, &data); + } *value = (u32) data; @@ -77,13 +84,17 @@ pci_sal_read (int seg, int bus, int devf static int pci_sal_write (int seg, int bus, int devfn, int reg, int len, u32 value) { - if ((seg > 255) || (bus > 255) || (devfn > 255) || (reg > 255)) + if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; - return ia64_sal_pci_config_write(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, value); + if ((seg < 256) && (reg < 256)) { + return ia64_sal_pci_config_write(PCI_SAL_ADDRESS(seg, bus, devfn, reg), len, value); + } else { + return ia64_sal_pci_config_write(PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg), len, value); + } } -struct pci_raw_ops pci_sal_ops = { +static struct pci_raw_ops pci_sal_ops = { .read = pci_sal_read, .write = pci_sal_write }; --- linux-2.6.3/arch/ia64/sn/io/drivers/ioconfig_bus.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/drivers/ioconfig_bus.c 2004-02-17 22:25:53.000000000 -0800 @@ -16,6 +16,7 @@ #include +#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/io.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/io.c 2004-02-17 22:25:53.000000000 -0800 @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/machvec/pci_bus_cvlink.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/machvec/pci_bus_cvlink.c 2004-02-17 22:25:53.000000000 -0800 @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/bte_error.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/bte_error.c 2004-02-17 22:25:53.000000000 -0800 @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/geo_op.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/geo_op.c 2004-02-17 22:25:53.000000000 -0800 @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/klconflib.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/klconflib.c 2004-02-17 22:25:53.000000000 -0800 @@ -474,8 +474,6 @@ board_serial_number_get(lboard_t *board, return(0); } -#include "asm/sn/sn_private.h" - /* * Format a module id for printing. * --- linux-2.6.3/arch/ia64/sn/io/sn2/ml_SN_init.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/ml_SN_init.c 2004-02-17 22:25:53.000000000 -0800 @@ -11,12 +11,12 @@ #include #include #include -#include #include #include #include #include #include +#include int maxcpus; @@ -69,12 +69,15 @@ void init_platform_nodepda(nodepda_t *np } void -init_platform_hubinfo(nodepda_t **nodepdaindr) { +init_platform_hubinfo(nodepda_t **nodepdaindr) +{ cnodeid_t cnode; hubinfo_t hubinfo; nodepda_t *npda; extern int numionodes; + if (IS_RUNNING_ON_SIMULATOR()) + return; for (cnode = 0; cnode < numionodes; cnode++) { npda = nodepdaindr[cnode]; hubinfo = (hubinfo_t)npda->pdinfo; --- linux-2.6.3/arch/ia64/sn/io/sn2/ml_SN_intr.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/ml_SN_intr.c 2004-02-17 22:25:43.000000000 -0800 @@ -30,6 +30,7 @@ #include #include #include +#include extern irqpda_t *irqpdaindr; extern cnodeid_t master_node_get(vertex_hdl_t vhdl); @@ -216,7 +217,6 @@ static cpuid_t intr_cpu_choose_from_node { cpuid_t cpu, best_cpu = CPU_NONE; int slice, min_count = 1000; - irqpda_t *irqs; for (slice = CPUS_PER_NODE - 1; slice >= 0; slice--) { int intrs; @@ -227,8 +227,7 @@ static cpuid_t intr_cpu_choose_from_node if (!cpu_online(cpu)) continue; - irqs = irqpdaindr; - intrs = irqs->num_irq_used; + intrs = pdacpu(cpu)->sn_num_irqs; if (min_count > intrs) { min_count = intrs; @@ -243,6 +242,7 @@ static cpuid_t intr_cpu_choose_from_node } } } + pdacpu(best_cpu)->sn_num_irqs++; return best_cpu; } --- linux-2.6.3/arch/ia64/sn/io/sn2/pcibr/pcibr_ate.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/pcibr/pcibr_ate.c 2004-02-17 22:25:53.000000000 -0800 @@ -8,7 +8,6 @@ #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/pcibr/pcibr_config.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/pcibr/pcibr_config.c 2004-02-17 22:25:53.000000000 -0800 @@ -8,7 +8,6 @@ #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/pcibr/pcibr_intr.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/pcibr/pcibr_intr.c 2004-02-17 22:25:53.000000000 -0800 @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/pcibr/pcibr_reg.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/pcibr/pcibr_reg.c 2004-02-17 22:25:53.000000000 -0800 @@ -8,7 +8,6 @@ #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/pcibr/pcibr_rrb.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/pcibr/pcibr_rrb.c 2004-02-17 22:25:53.000000000 -0800 @@ -8,7 +8,6 @@ #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/shub.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/shub.c 2004-02-17 22:25:53.000000000 -0800 @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/shuberror.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/shuberror.c 2004-02-17 22:25:53.000000000 -0800 @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/sn2/shub_intr.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/shub_intr.c 2004-02-17 22:25:53.000000000 -0800 @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/io/xswitch.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/io/xswitch.c 2004-02-17 22:25:53.000000000 -0800 @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.3/arch/ia64/sn/kernel/irq.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/kernel/irq.c 2004-02-17 22:25:53.000000000 -0800 @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -121,7 +120,7 @@ sn_end_irq(unsigned int irq) static void sn_set_affinity_irq(unsigned int irq, unsigned long cpu) { -#if CONFIG_SMP +#ifdef CONFIG_SMP int redir = 0; struct sn_intr_list_t *p = sn_intr_list[irq]; pcibr_intr_t intr; --- linux-2.6.3/arch/ia64/sn/kernel/setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ia64/sn/kernel/setup.c 2004-02-17 22:25:49.000000000 -0800 @@ -85,6 +85,7 @@ int numionodes; u64 master_node_bedrock_address; static void sn_init_pdas(char **); +static void scan_for_ionodes(void); static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; @@ -131,7 +132,7 @@ char drive_info[4*16]; * may not be initialized yet. */ -static int +static int __init pxm_to_nasid(int pxm) { int i; @@ -358,11 +359,10 @@ sn_setup(char **cmdline_p) * * One time setup for Node Data Area. Called by sn_setup(). */ -void +void __init sn_init_pdas(char **cmdline_p) { cnodeid_t cnode; - void scan_for_ionodes(void); /* * Make sure that the PDA fits entirely in the same page as the @@ -498,7 +498,7 @@ sn_cpu_init(void) * physical_node_map and the pda and increment numionodes. */ -void +static void __init scan_for_ionodes(void) { int nasid = 0; --- linux-2.6.3/arch/m68k/amiga/amiints.c 2003-06-14 12:18:52.000000000 -0700 +++ 25/arch/m68k/amiga/amiints.c 2004-02-17 22:25:30.000000000 -0800 @@ -49,6 +49,7 @@ #include #include #include +#include extern int cia_request_irq(struct ciabase *base,int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *), --- linux-2.6.3/arch/m68k/bvme6000/bvmeints.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/m68k/bvme6000/bvmeints.c 2004-02-17 22:25:30.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include static irqreturn_t bvme6000_defhand (int irq, void *dev_id, struct pt_regs *fp); --- linux-2.6.3/arch/m68k/hp300/time.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/m68k/hp300/time.c 2004-02-17 22:25:30.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include "ints.h" /* Clock hardware definitions */ --- linux-2.6.3/arch/m68k/Kconfig 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/m68k/Kconfig 2004-02-17 22:25:10.000000000 -0800 @@ -1030,8 +1030,7 @@ config GEN_RTC precision in some cases. To compile this driver as a module, choose M here: the - module will be called genrtc. To load the module automatically - add 'alias char-major-10-135 genrtc' to your /etc/modules.conf + module will be called genrtc. config GEN_RTC_X bool "Extended RTC operation" --- linux-2.6.3/arch/m68k/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/m68k/kernel/signal.c 2004-02-17 22:25:26.000000000 -0800 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/m68k/kernel/sys_m68k.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/m68k/kernel/sys_m68k.c 2004-02-17 22:25:30.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -156,8 +157,6 @@ out: } #endif -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; @@ -262,7 +261,7 @@ asmlinkage int sys_ipc (uint call, int f return -EINVAL; } -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on) { return -ENOSYS; } --- linux-2.6.3/arch/m68k/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/m68k/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -174,6 +174,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/m68k/mac/iop.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/m68k/mac/iop.c 2004-02-17 22:25:30.000000000 -0800 @@ -118,6 +118,7 @@ #include #include #include +#include /*#define DEBUG_IOP*/ --- linux-2.6.3/arch/m68k/mac/macints.c 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/m68k/mac/macints.c 2004-02-17 22:25:30.000000000 -0800 @@ -132,8 +132,8 @@ #include #include #include - #include +#include #define DEBUG_SPURIOUS #define SHUTUP_SONIC --- linux-2.6.3/arch/m68k/mac/oss.c 2003-06-14 12:18:20.000000000 -0700 +++ 25/arch/m68k/mac/oss.c 2004-02-17 22:25:30.000000000 -0800 @@ -26,6 +26,7 @@ #include #include #include +#include int oss_present; volatile struct mac_oss *oss; --- linux-2.6.3/arch/m68k/mac/psc.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/m68k/mac/psc.c 2004-02-17 22:25:30.000000000 -0800 @@ -24,6 +24,7 @@ #include #include #include +#include #define DEBUG_PSC --- linux-2.6.3/arch/m68k/mac/via.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/m68k/mac/via.c 2004-02-17 22:25:30.000000000 -0800 @@ -23,7 +23,6 @@ #include #include #include - #include #include @@ -33,6 +32,7 @@ #include #include #include +#include volatile __u8 *via1, *via2; #if 0 --- linux-2.6.3/arch/m68knommu/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/m68knommu/kernel/signal.c 2004-02-17 22:25:26.000000000 -0800 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -50,8 +51,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -asmlinkage long sys_wait4(pid_t pid, unsigned int * stat_addr, int options, - struct rusage * ru); asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs); /* --- linux-2.6.3/arch/m68knommu/kernel/sys_m68k.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/m68knommu/kernel/sys_m68k.c 2004-02-17 22:25:30.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -111,8 +112,6 @@ out: return error; } -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; @@ -194,7 +193,7 @@ asmlinkage int sys_ipc (uint call, int f return -EINVAL; } -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on) { return -ENOSYS; } --- linux-2.6.3/arch/m68knommu/kernel/time.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/m68knommu/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -199,6 +199,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/m68k/q40/q40ints.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/m68k/q40/q40ints.c 2004-02-17 22:25:30.000000000 -0800 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include --- linux-2.6.3/arch/m68k/sun3/sun3ints.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/m68k/sun3/sun3ints.c 2004-02-17 22:25:30.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include extern void sun3_leds (unsigned char); --- linux-2.6.3/arch/mips/kernel/ioctl32.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/mips/kernel/ioctl32.c 2004-02-17 22:25:22.000000000 -0800 @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -94,8 +95,6 @@ #include #endif -long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg) { mm_segment_t old_fs = get_fs(); --- linux-2.6.3/arch/mips/kernel/irixioctl.c 2003-07-02 14:53:13.000000000 -0700 +++ 25/arch/mips/kernel/irixioctl.c 2004-02-17 22:25:22.000000000 -0800 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -26,9 +27,6 @@ struct irix_termios { cc_t c_cc[NCCS]; }; -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); -extern asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count); extern void start_tty(struct tty_struct *tty); static struct tty_struct *get_tty(int fd) { --- linux-2.6.3/arch/mips/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/mips/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -835,7 +835,7 @@ static cpumask_t irq_affinity [NR_IRQS] static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -873,7 +873,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/mips/kernel/linux32.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/mips/kernel/linux32.c 2004-02-17 22:25:25.000000000 -0800 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -123,8 +124,6 @@ out: } -asmlinkage long sys_truncate(const char * path, unsigned long length); - asmlinkage int sys_truncate64(const char *path, unsigned int high, unsigned int low) { @@ -133,8 +132,6 @@ asmlinkage int sys_truncate64(const char return sys_truncate(path, ((long) high << 32) | low); } -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys_ftruncate64(unsigned int fd, unsigned int high, unsigned int low) { @@ -375,8 +372,6 @@ xlate_dirent(void *dirent64, void *diren return; } -asmlinkage long sys_getdents(unsigned int fd, void * dirent, unsigned int count); - asmlinkage long sys32_getdents(unsigned int fd, void * dirent32, unsigned int count) { @@ -497,8 +492,6 @@ struct sysinfo32 { char _f[8]; }; -extern asmlinkage int sys_sysinfo(struct sysinfo *info); - asmlinkage int sys32_sysinfo(struct sysinfo32 *info) { struct sysinfo s; @@ -633,10 +626,6 @@ sys32_settimeofday(struct compat_timeval return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -extern asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, - unsigned long offset_low, loff_t * result, - unsigned int origin); - asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high, unsigned int offset_low, loff_t * result, unsigned int origin) @@ -1018,10 +1007,6 @@ out_nofds: } - -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, - struct timespec *interval); - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) { @@ -1668,9 +1653,6 @@ asmlinkage long sys32_sysctl(struct sysc #endif /* CONFIG_SYSCTL */ -extern asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr); - asmlinkage int sys32_sched_setaffinity(compat_pid_t pid, unsigned int len, u32 *user_mask_ptr) { @@ -1692,9 +1674,6 @@ asmlinkage int sys32_sched_setaffinity(c return ret; } -extern asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr); - asmlinkage int sys32_sched_getaffinity(compat_pid_t pid, unsigned int len, u32 *user_mask_ptr) { @@ -1734,8 +1713,6 @@ asmlinkage long sys32_newuname(struct ne return ret; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int sys32_personality(unsigned long personality) { int ret; @@ -1820,8 +1797,6 @@ asmlinkage int sys32_adjtimex(struct tim return ret; } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { @@ -1842,8 +1817,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage ssize_t sys32_readahead(int fd, u32 pad0, u64 a2, u64 a3, size_t count) { --- linux-2.6.3/arch/mips/kernel/signal32.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/mips/kernel/signal32.c 2004-02-17 22:25:24.000000000 -0800 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -761,9 +762,6 @@ out: return ret; } -asmlinkage long sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, - size_t sigsetsize); - asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize) { @@ -785,8 +783,6 @@ asmlinkage int sys32_rt_sigprocmask(int return ret; } -asmlinkage long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - asmlinkage int sys32_rt_sigpending(compat_sigset_t *uset, unsigned int sigsetsize) { @@ -895,8 +891,6 @@ asmlinkage int sys32_rt_sigtimedwait(com return ret; } -extern asmlinkage int sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage int sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { siginfo_t info; --- linux-2.6.3/arch/mips/kernel/syscall.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/mips/kernel/syscall.c 2004-02-17 22:25:28.000000000 -0800 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/mips/kernel/sysirix.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/mips/kernel/sysirix.c 2004-02-17 22:25:25.000000000 -0800 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -235,13 +236,6 @@ asmlinkage int irix_prctl(struct pt_regs #undef DEBUG_PROCGRPS extern unsigned long irix_mapelf(int fd, struct elf_phdr *user_phdrp, int cnt); -extern asmlinkage int sys_setpgid(pid_t pid, pid_t pgid); -extern void sys_sync(void); -extern asmlinkage int sys_getsid(pid_t pid); -extern asmlinkage long sys_write (unsigned int fd, const char *buf, unsigned long count); -extern asmlinkage long sys_lseek (unsigned int fd, off_t offset, unsigned int origin); -extern asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist); -extern asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist); extern int getrusage(struct task_struct *p, int who, struct rusage *ru); extern char *prom_getenv(char *name); extern long prom_setenv(char *name, char *value); @@ -368,7 +362,7 @@ asmlinkage int irix_syssgi(struct pt_reg retval = HZ; goto out; case 4: - retval = NGROUPS; + retval = NGROUPS_MAX; goto out; case 5: retval = NR_OPEN; @@ -694,9 +688,6 @@ asmlinkage int irix_pause(void) return -EINTR; } -extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void * data); - /* XXX need more than this... */ asmlinkage int irix_mount(char *dev_name, char *dir_name, unsigned long flags, char *type, void *data, int datalen) @@ -792,9 +783,6 @@ out: return error; } -extern asmlinkage int sys_setpgid(pid_t pid, pid_t pgid); -extern asmlinkage int sys_setsid(void); - asmlinkage int irix_setpgrp(int flags) { int error; @@ -883,8 +871,6 @@ asmlinkage unsigned long irix_sethostid( return -EINVAL; } -extern asmlinkage int sys_socket(int family, int type, int protocol); - asmlinkage int irix_socket(int family, int type, int protocol) { switch(type) { @@ -1356,8 +1342,6 @@ asmlinkage int irix_fxstat(int version, return error; } -extern asmlinkage int sys_mknod(const char * filename, int mode, unsigned dev); - asmlinkage int irix_xmknod(int ver, char *filename, int mode, unsigned dev) { int retval; @@ -1501,9 +1485,6 @@ asmlinkage int irix_sigqueue(int pid, in return -EINVAL; } -extern asmlinkage int sys_truncate(const char * path, unsigned long length); -extern asmlinkage int sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int irix_truncate64(char *name, int pad, int size1, int size2) { int retval; @@ -1532,10 +1513,6 @@ out: return retval; } -extern asmlinkage unsigned long -sys_mmap(unsigned long addr, size_t len, int prot, int flags, int fd, - off_t offset); - asmlinkage int irix_mmap64(struct pt_regs *regs) { int len, prot, flags, fd, off1, off2, error, base = 0; @@ -2106,9 +2083,6 @@ out: #undef DEBUG_FCNTL -extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg); - #define IRIX_F_ALLOCSP 10 asmlinkage int irix_fcntl(int fd, int cmd, int arg) --- linux-2.6.3/arch/mips/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/mips/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -132,7 +132,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); - + clock_was_set(); return 0; } --- linux-2.6.3/arch/parisc/hpux/ioctl.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/parisc/hpux/ioctl.c 2004-02-17 22:25:22.000000000 -0800 @@ -35,13 +35,12 @@ #include #include +#include #include #include #include #include -int sys_ioctl(unsigned int, unsigned int, unsigned long); - static int hpux_ioctl_t(int fd, unsigned long cmd, unsigned long arg) { int result = -EOPNOTSUPP; --- linux-2.6.3/arch/parisc/hpux/sys_hpux.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/parisc/hpux/sys_hpux.c 2004-02-17 22:25:30.000000000 -0800 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,6 @@ #include #include -unsigned long sys_brk(unsigned long addr); - unsigned long hpux_brk(unsigned long addr) { /* Sigh. Looks like HP/UX libc relies on kernel bugs. */ @@ -61,7 +60,6 @@ int hpux_ptrace(void) int hpux_wait(int *stat_loc) { - extern int sys_waitpid(int, int *, int); return sys_waitpid(-1, stat_loc, 0); } @@ -213,7 +211,6 @@ int hpux_statfs(const char *path, struct kfree(kpath); /* just fake it, beginning of structures match */ - extern int sys_statfs(const char *, struct statfs *); error = sys_statfs(path, (struct statfs *) buf); /* ignoring rest of statfs struct, but it should be zeros. Need to do @@ -269,9 +266,6 @@ static int hpux_uname(struct hpux_utsnam return error; } -int sys_sethostname(char *, int); -int sys_gethostname(char *, int); - /* Note: HP-UX just uses the old suser() function to check perms * in this system call. We'll use capable(CAP_SYS_ADMIN). */ --- linux-2.6.3/arch/parisc/kernel/ioctl32.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/parisc/kernel/ioctl32.c 2004-02-17 22:25:28.000000000 -0800 @@ -8,6 +8,8 @@ * ioctls. */ +#include + #define INCLUDES #include "compat_ioctl.c" --- linux-2.6.3/arch/parisc/kernel/parisc_ksyms.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/parisc/kernel/parisc_ksyms.c 2004-02-17 22:25:25.000000000 -0800 @@ -27,6 +27,7 @@ #include #include #include +#include #include EXPORT_SYMBOL(memchr); @@ -83,10 +84,6 @@ EXPORT_SYMBOL(__memcpy_fromio); EXPORT_SYMBOL(__memset_io); #include -extern long sys_open(const char *, int, int); -extern off_t sys_lseek(int, off_t, int); -extern int sys_read(int, char *, int); -extern int sys_write(int, const char *, int); EXPORT_SYMBOL(sys_open); EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(sys_read); --- linux-2.6.3/arch/parisc/kernel/signal32.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/parisc/kernel/signal32.c 2004-02-17 22:25:24.000000000 -0800 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -92,9 +93,6 @@ get_sigset32(compat_sigset_t *up, sigset int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize) { - extern long sys_rt_sigprocmask(int how, - sigset_t *set, sigset_t *oset, - size_t sigsetsize); sigset_t old_set, new_set; int ret; @@ -115,7 +113,6 @@ int sys32_rt_sigpending(compat_sigset_t { int ret; sigset_t set; - extern long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); KERNEL_SYSCALL(ret, sys_rt_sigpending, &set, sigsetsize); --- linux-2.6.3/arch/parisc/kernel/sys_parisc32.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/parisc/kernel/sys_parisc32.c 2004-02-17 22:25:24.000000000 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -357,7 +358,6 @@ asmlinkage long sys32_sched_rr_get_inter { struct timespec t; int ret; - extern asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); KERNEL_SYSCALL(ret, sys_sched_rr_get_interval, pid, &t); if (put_compat_timespec(&t, interval)) @@ -1089,7 +1089,6 @@ asmlinkage long sys32_msgrcv(int msqid, } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -1197,7 +1196,6 @@ asmlinkage int sys32_nfsservctl(int cmd, return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t *offset, size_t count); typedef long __kernel_loff_t32; /* move this to asm/posix_types.h? */ asmlinkage int sys32_sendfile64(int out_fd, int in_fd, __kernel_loff_t32 *offset, s32 count) @@ -1345,8 +1343,6 @@ asmlinkage int sys32_sysinfo(struct sysi * half of the argument has been zeroed by syscall.S. */ -extern asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin); - asmlinkage int sys32_lseek(unsigned int fd, int offset, unsigned int origin) { return sys_lseek(fd, offset, origin); @@ -1367,8 +1363,6 @@ asmlinkage long sys32_semctl(int semid, return sys_semctl (semid, semnum, cmd, arg); } -extern long sys_lookup_dcookie(u64 cookie64, char *buf, size_t len); - long sys32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char *buf, size_t len) { --- linux-2.6.3/arch/parisc/kernel/sys_parisc.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/parisc/kernel/sys_parisc.c 2004-02-17 22:25:30.000000000 -0800 @@ -30,6 +30,7 @@ #include #include #include +#include int sys_pipe(int *fildes) { @@ -182,10 +183,6 @@ long sys_shmat_wrapper(int shmid, char * /* Fucking broken ABI */ #ifdef CONFIG_PARISC64 -extern asmlinkage long sys_truncate(const char *, unsigned long); -extern asmlinkage long sys_ftruncate(unsigned int, unsigned long); -extern asmlinkage long sys_fcntl(unsigned int, unsigned int, unsigned long); - asmlinkage long parisc_truncate64(const char * path, unsigned int high, unsigned int low) { @@ -214,9 +211,6 @@ asmlinkage long sys_fcntl64(unsigned int } #else -extern asmlinkage long sys_truncate64(const char *, loff_t); -extern asmlinkage long sys_ftruncate64(unsigned int, loff_t); - asmlinkage long parisc_truncate64(const char * path, unsigned int high, unsigned int low) { @@ -230,12 +224,6 @@ asmlinkage long parisc_ftruncate64(unsig } #endif -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char *buf, - size_t count, loff_t pos); -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char *buf, - size_t count, loff_t pos); -extern asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage ssize_t parisc_pread64(unsigned int fd, char *buf, size_t count, unsigned int high, unsigned int low) { @@ -257,7 +245,7 @@ asmlinkage ssize_t parisc_readahead(int /* * This changes the io permissions bitmap in the current task. */ -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { return -ENOSYS; } --- linux-2.6.3/arch/parisc/kernel/time.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/parisc/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -230,6 +230,7 @@ do_settimeofday (struct timespec *tv) time_esterror = NTP_PHASE_LIMIT; } write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } EXPORT_SYMBOL(do_settimeofday); --- linux-2.6.3/arch/ppc64/kernel/ioctl32.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/ppc64/kernel/ioctl32.c 2004-02-17 22:25:28.000000000 -0800 @@ -23,6 +23,7 @@ #define INCLUDES #include "compat_ioctl.c" #include +#include #include #define CODE --- linux-2.6.3/arch/ppc64/kernel/irq.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -664,7 +664,7 @@ cpumask_t irq_affinity [NR_IRQS] = { [0 static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -702,7 +702,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/ppc64/kernel/iSeries_irq.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/iSeries_irq.c 2004-02-17 22:08:49.000000000 -0800 @@ -57,6 +57,7 @@ static hw_irq_controller iSeries_IRQ_han void iSeries_init_irq_desc(irq_desc_t *desc) { + desc->handler = &iSeries_IRQ_handler; } /* This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c */ @@ -87,22 +88,6 @@ int __init iSeries_allocate_IRQ(HvBusNum #define IRQ_TO_IDSEL(irq) (((((irq) - 1) >> 3) & 7) + 1) #define IRQ_TO_FUNC(irq) (((irq) - 1) & 7) -/* - * This is called out of iSeries_scan_slot to assign the EADS slot - * to its IRQ number - */ -int __init iSeries_assign_IRQ(int irq, HvBusNumber busNumber, - HvSubBusNumber subBusNumber, HvAgentId deviceId) -{ - irq_desc_t *desc = get_real_irq_desc(irq); - - if (desc == NULL) - return -1; - desc->handler = &iSeries_IRQ_handler; - return 0; -} - - /* This is called by iSeries_activate_IRQs */ static unsigned int iSeries_startup_IRQ(unsigned int irq) { @@ -125,6 +110,11 @@ static unsigned int iSeries_startup_IRQ( } /* + * Temporary hack + */ +#define get_irq_desc(irq) &irq_desc[(irq)] + +/* * This is called out of iSeries_fixup to activate interrupt * generation for usable slots */ --- linux-2.6.3/arch/ppc64/kernel/iSeries_pci.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/iSeries_pci.c 2004-02-17 22:08:49.000000000 -0800 @@ -406,7 +406,6 @@ static int iSeries_Scan_Bridge_Slot(HvBu /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */ Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel); - iSeries_assign_IRQ(Irq, Bus, 0, EADsIdSel); PPCDBG(PPCDBG_BUSWALK, "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n", Bus, 0, EADsIdSel, Irq); --- linux-2.6.3/arch/ppc64/kernel/iSeries_setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/iSeries_setup.c 2004-02-17 22:08:49.000000000 -0800 @@ -315,7 +315,6 @@ void __init iSeries_init_early(void) ppc_md.setup_residual = iSeries_setup_residual; ppc_md.get_cpuinfo = iSeries_get_cpuinfo; ppc_md.init_IRQ = iSeries_init_IRQ; - ppc_md.init_irq_desc = iSeries_init_irq_desc; ppc_md.get_irq = iSeries_get_irq; ppc_md.init = NULL; --- linux-2.6.3/arch/ppc64/kernel/lparcfg.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/lparcfg.c 2004-02-17 22:25:19.000000000 -0800 @@ -134,7 +134,7 @@ static int lparcfg_data(unsigned char *b memset(buf, 0, size); shared = (int)(lpaca->xLpPacaPtr->xSharedProc); - n += snprintf(buf, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf, LPARCFG_BUFF_SIZE - n, "serial_number=%c%c%c%c%c%c%c\n", e2a(xItExtVpdPanel.mfgID[2]), e2a(xItExtVpdPanel.mfgID[3]), @@ -144,7 +144,7 @@ static int lparcfg_data(unsigned char *b e2a(xItExtVpdPanel.systemSerial[4]), e2a(xItExtVpdPanel.systemSerial[5])); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_type=%c%c%c%c\n", e2a(xItExtVpdPanel.machineType[0]), e2a(xItExtVpdPanel.machineType[1]), @@ -152,23 +152,23 @@ static int lparcfg_data(unsigned char *b e2a(xItExtVpdPanel.machineType[3])); lp_index = HvLpConfig_getLpIndex(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_id=%d\n", (int)lp_index); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_active_processors=%d\n", (int)HvLpConfig_getSystemPhysicalProcessors()); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_potential_processors=%d\n", (int)HvLpConfig_getSystemPhysicalProcessors()); processors = (int)HvLpConfig_getPhysicalProcessors(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_active_processors=%d\n", processors); max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_potential_processors=%d\n", max_processors); if(shared) { @@ -178,22 +178,22 @@ static int lparcfg_data(unsigned char *b entitled_capacity = processors * 100; max_entitled_capacity = max_processors * 100; } - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_entitled_capacity=%d\n", entitled_capacity); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_max_entitled_capacity=%d\n", max_entitled_capacity); if(shared) { pool_id = HvLpConfig_getSharedPoolIndex(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool=%d\n", + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool=%d\n", (int)pool_id); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool_capacity=%d\n", (int)(HvLpConfig_getNumProcsInSharedPool(pool_id)*100)); } - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "shared_processor_mode=%d\n", shared); return 0; @@ -297,13 +297,13 @@ static int lparcfg_data(unsigned char *b if(lp_index_ptr) lp_index = *lp_index_ptr; } - n = snprintf(buf, LPARCFG_BUFF_SIZE - n, + n = scnprintf(buf, LPARCFG_BUFF_SIZE - n, "serial_number=%s\n", system_id); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_type=%s\n", model); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_id=%d\n", (int)lp_index); rtas_node = find_path_device("/rtas"); @@ -317,74 +317,74 @@ static int lparcfg_data(unsigned char *b if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { h_get_ppp(&h_entitled,&h_unallocated,&h_aggregation,&h_resource); #ifdef DEBUG - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R4=0x%lx\n", h_entitled); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R5=0x%lx\n", h_unallocated); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R6=0x%lx\n", h_aggregation); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R7=0x%lx\n", h_resource); #endif /* DEBUG */ } if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { system_potential_processors = get_splpar_potential_characteristics(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_active_processors=%ld\n", (h_resource >> 2*8) & 0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_potential_processors=%d\n", system_potential_processors); } else { system_potential_processors = system_active_processors; - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_active_processors=%d\n", system_active_processors); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_potential_processors=%d\n", system_potential_processors); } processors = systemcfg->processorCount; - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_active_processors=%d\n", processors); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_potential_processors=%d\n", system_active_processors); /* max_entitled_capacity will come out of get_splpar_potential_characteristics() when that function is complete */ max_entitled_capacity = system_active_processors * 100; if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_entitled_capacity=%ld\n", h_entitled); } else { - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_entitled_capacity=%d\n", system_active_processors*100); } - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_max_entitled_capacity=%d\n", max_entitled_capacity); shared = 0; - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "shared_processor_mode=%d\n", shared); if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool=%ld\n", (h_aggregation >> 0*8)&0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool_capacity=%ld\n", (h_resource >> 3*8) &0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "group=%ld\n", (h_aggregation >> 2*8)&0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "capped=%ld\n", (h_resource >> 6*8)&0x40); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "capacity_weight=%d\n", (int)(h_resource>>5*8)&0xFF); } return 0; --- linux-2.6.3/arch/ppc64/kernel/ppc_ksyms.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/ppc_ksyms.c 2004-02-17 22:25:22.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -48,7 +49,6 @@ #include #endif -extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); extern int do_signal(sigset_t *, struct pt_regs *); int abs(int); --- linux-2.6.3/arch/ppc64/kernel/prom.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/prom.c 2004-02-17 22:08:48.000000000 -0800 @@ -186,12 +186,13 @@ extern void copy_and_flush(unsigned long extern char cmd_line[512]; /* XXX */ unsigned long dev_tree_size; +unsigned long _get_PIR(void); #ifdef CONFIG_HMT struct { unsigned int pir; unsigned int threadid; -} hmt_thread_data[NR_CPUS] = {0}; +} hmt_thread_data[NR_CPUS]; #endif /* CONFIG_HMT */ char testString[] = "LINUX\n"; --- linux-2.6.3/arch/ppc64/kernel/rtasd.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/rtasd.c 2004-02-17 22:25:24.000000000 -0800 @@ -336,8 +336,6 @@ static int get_eventscan_parms(void) return 0; } -extern long sys_sched_get_priority_max(int policy); - static int rtasd(void *unused) { unsigned int err_type; --- linux-2.6.3/arch/ppc64/kernel/rtas-proc.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/rtas-proc.c 2004-02-17 22:25:19.000000000 -0800 @@ -287,9 +287,9 @@ static ssize_t ppc_rtas_poweron_read(str char stkbuf[40]; /* its small, its on stack */ int n, sn; if (power_on_time == 0) - n = snprintf(stkbuf, 40, "Power on time not set\n"); + n = scnprintf(stkbuf,sizeof(stkbuf),"Power on time not set\n"); else - n = snprintf(stkbuf, 40, "%lu\n", power_on_time); + n = scnprintf(stkbuf,sizeof(stkbuf),"%lu\n",power_on_time); sn = strlen (stkbuf) +1; if (*ppos >= sn) @@ -410,9 +410,10 @@ static ssize_t ppc_rtas_clock_read(struc if (error != 0){ printk(KERN_WARNING "error: reading the clock returned: %s\n", ppc_rtas_process_error(error)); - n = snprintf (stkbuf, 40, "0"); + n = scnprintf (stkbuf, sizeof(stkbuf), "0"); } else { - n = snprintf (stkbuf, 40, "%lu\n", mktime(year, mon, day, hour, min, sec)); + n = scnprintf (stkbuf, sizeof(stkbuf), "%lu\n", + mktime(year, mon, day, hour, min, sec)); } kfree(ret); @@ -819,7 +820,7 @@ int get_location_code(struct individual_ n += check_location_string(ret, buffer + n); n += sprintf ( buffer+n, " "); /* see how many characters we have printed */ - snprintf ( t, 50, "%s ", ret); + scnprintf(t, sizeof(t), "%s ", ret); pos += strlen(t); if (pos >= llen) pos=0; @@ -863,7 +864,7 @@ static ssize_t ppc_rtas_tone_freq_read(s int n, sn; char stkbuf[40]; /* its small, its on stack */ - n = snprintf(stkbuf, 40, "%lu\n", rtas_tone_frequency); + n = scnprintf(stkbuf, 40, "%lu\n", rtas_tone_frequency); sn = strlen (stkbuf) +1; if (*ppos >= sn) @@ -917,7 +918,7 @@ static ssize_t ppc_rtas_tone_volume_read int n, sn; char stkbuf[40]; /* its small, its on stack */ - n = snprintf(stkbuf, 40, "%lu\n", rtas_tone_volume); + n = scnprintf(stkbuf, 40, "%lu\n", rtas_tone_volume); sn = strlen (stkbuf) +1; if (*ppos >= sn) --- linux-2.6.3/arch/ppc64/kernel/setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/setup.c 2004-02-17 22:08:48.000000000 -0800 @@ -80,7 +80,7 @@ unsigned long decr_overclock_proc0_set = int powersave_nap; -char saved_command_line[256]; +char saved_command_line[COMMAND_LINE_SIZE]; unsigned char aux_device_present; void parse_cmd_line(unsigned long r3, unsigned long r4, unsigned long r5, @@ -536,7 +536,7 @@ int parse_bootinfo(void) for ( ; rec->tag != BI_LAST ; rec = bi_rec_next(rec) ) { switch (rec->tag) { case BI_CMD_LINE: - memcpy(cmd_line, (void *)rec->data, rec->size); + strlcpy(cmd_line, (void *)rec->data, sizeof(cmd_line)); break; case BI_SYSMAP: sysmap = __va(rec->data[0]); @@ -620,7 +620,7 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = klimit; /* Save unparsed command line copy for /proc/cmdline */ - strcpy(saved_command_line, cmd_line); + strlcpy(saved_command_line, cmd_line, sizeof(saved_command_line)); *cmdline_p = cmd_line; /* set up the bootmem stuff with available memory */ --- linux-2.6.3/arch/ppc64/kernel/signal32.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/kernel/signal32.c 2004-02-17 22:25:30.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/ppc64/kernel/syscalls.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/kernel/syscalls.c 2004-02-17 22:25:28.000000000 -0800 @@ -22,6 +22,7 @@ #include #include +#include #include #include #include --- linux-2.6.3/arch/ppc64/kernel/sys_ppc32.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/sys_ppc32.c 2004-02-17 22:25:30.000000000 -0800 @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include #include @@ -65,6 +67,7 @@ #include #include #include +#include #include @@ -778,8 +781,6 @@ int cp_compat_stat(struct kstat *stat, s return err; } -extern asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1155,8 +1156,6 @@ struct sysinfo32 { char _f[20-2*sizeof(int)-sizeof(int)]; }; -extern asmlinkage long sys_sysinfo(struct sysinfo *info); - asmlinkage long sys32_sysinfo(struct sysinfo32 *info) { struct sysinfo s; @@ -1868,8 +1867,6 @@ asmlinkage long sys32_ipc(u32 call, u32 return err; } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t* offset, size_t count); - /* Note: it is necessary to treat out_fd and in_fd as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1894,8 +1891,6 @@ asmlinkage long sys32_sendfile(u32 out_f return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t *offset, size_t count); - asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2158,9 +2153,6 @@ void start_thread32(struct pt_regs* regs #endif /* CONFIG_ALTIVEC */ } -extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2175,14 +2167,12 @@ asmlinkage long sys32_prctl(u32 option, (unsigned long) arg5); } -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) * and the register representation of a signed int (msr in 64-bit mode) is performed. */ -asmlinkage int sys32_sched_rr_get_interval(u32 pid, struct compat_timespec *interval) +asmlinkage long sys32_sched_rr_get_interval(u32 pid, struct compat_timespec *interval) { struct timespec t; int ret; @@ -2196,9 +2186,6 @@ asmlinkage int sys32_sched_rr_get_interv return ret; } -extern asmlinkage int sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, - unsigned long len, unsigned char *buf); - asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) { return sys_pciconfig_read((unsigned long) bus, @@ -2208,12 +2195,6 @@ asmlinkage int sys32_pciconfig_read(u32 (unsigned char *)AA(ubuf)); } - - - -extern asmlinkage int sys_pciconfig_write(unsigned long bus, unsigned long dfn, unsigned long off, - unsigned long len, unsigned char *buf); - asmlinkage int sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) { return sys_pciconfig_write((unsigned long) bus, @@ -2281,8 +2262,6 @@ asmlinkage int sys32_pciconfig_iobase(u3 } -extern asmlinkage int sys_newuname(struct new_utsname * name); - asmlinkage int ppc64_newuname(struct new_utsname * name) { int errno = sys_newuname(name); @@ -2295,8 +2274,6 @@ asmlinkage int ppc64_newuname(struct new return errno; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int ppc64_personality(unsigned long personality) { int ret; @@ -2310,8 +2287,6 @@ asmlinkage int ppc64_personality(unsigne -extern asmlinkage long sys_access(const char * filename, int mode); - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2323,8 +2298,6 @@ asmlinkage long sys32_access(const char } -extern asmlinkage long sys_creat(const char * pathname, int mode); - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2336,8 +2309,6 @@ asmlinkage long sys32_creat(const char * } -extern asmlinkage long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); - /* Note: it is necessary to treat pid and options as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2349,8 +2320,6 @@ asmlinkage long sys32_waitpid(u32 pid, u } -extern asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist); - /* Note: it is necessary to treat gidsetsize as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2362,8 +2331,6 @@ asmlinkage long sys32_getgroups(u32 gids } -extern asmlinkage long sys_getpgid(pid_t pid); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2375,8 +2342,6 @@ asmlinkage long sys32_getpgid(u32 pid) } -extern asmlinkage long sys_getpriority(int which, int who); - /* Note: it is necessary to treat which and who as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2388,8 +2353,6 @@ asmlinkage long sys32_getpriority(u32 wh } -extern asmlinkage long sys_getsid(pid_t pid); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2400,7 +2363,6 @@ asmlinkage long sys32_getsid(u32 pid) return sys_getsid((int)pid); } -extern asmlinkage long sys_kill(int pid, int sig); /* Note: it is necessary to treat pid and sig as unsigned ints, * with the corresponding cast to a signed int to insure that the @@ -2413,8 +2375,6 @@ asmlinkage long sys32_kill(u32 pid, u32 } -extern asmlinkage long sys_mkdir(const char * pathname, int mode); - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2425,16 +2385,12 @@ asmlinkage long sys32_mkdir(const char * return sys_mkdir(pathname, (int)mode); } -extern asmlinkage long sys_nice(int increment); - long sys32_nice(u32 increment) { /* sign extend increment */ return sys_nice((int)increment); } -extern off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin); - off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) { /* sign extend n */ @@ -2472,8 +2428,6 @@ out_error: goto out; } -extern asmlinkage long sys_readlink(const char * path, char * buf, int bufsiz); - /* Note: it is necessary to treat bufsiz as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2484,8 +2438,6 @@ asmlinkage long sys32_readlink(const cha return sys_readlink(path, buf, (int)bufsiz); } -extern asmlinkage long sys_sched_get_priority_max(int policy); - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2497,8 +2449,6 @@ asmlinkage long sys32_sched_get_priority } -extern asmlinkage long sys_sched_get_priority_min(int policy); - /* Note: it is necessary to treat policy as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2510,8 +2460,6 @@ asmlinkage long sys32_sched_get_priority } -extern asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2523,8 +2471,6 @@ asmlinkage long sys32_sched_getparam(u32 } -extern asmlinkage long sys_sched_getscheduler(pid_t pid); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2536,8 +2482,6 @@ asmlinkage long sys32_sched_getscheduler } -extern asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2549,8 +2493,6 @@ asmlinkage long sys32_sched_setparam(u32 } -extern asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param *param); - /* Note: it is necessary to treat pid and policy as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2562,8 +2504,6 @@ asmlinkage long sys32_sched_setscheduler } -extern asmlinkage long sys_setdomainname(char *name, int len); - /* Note: it is necessary to treat len as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2575,8 +2515,6 @@ asmlinkage long sys32_setdomainname(char } -extern asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist); - /* Note: it is necessary to treat gidsetsize as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2588,8 +2526,6 @@ asmlinkage long sys32_setgroups(u32 gids } -extern asmlinkage long sys_sethostname(char *name, int len); - asmlinkage long sys32_sethostname(char *name, u32 len) { /* sign extend len */ @@ -2597,8 +2533,6 @@ asmlinkage long sys32_sethostname(char * } -extern asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); - /* Note: it is necessary to treat pid and pgid as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2610,16 +2544,12 @@ asmlinkage long sys32_setpgid(u32 pid, u } -extern asmlinkage long sys_setpriority(int which, int who, int niceval); - long sys32_setpriority(u32 which, u32 who, u32 niceval) { /* sign extend which, who and niceval */ return sys_setpriority((int)which, (int)who, (int)niceval); } -extern asmlinkage long sys_ssetmask(int newmask); - /* Note: it is necessary to treat newmask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2630,8 +2560,6 @@ asmlinkage long sys32_ssetmask(u32 newma return sys_ssetmask((int) newmask); } -extern asmlinkage long sys_syslog(int type, char * buf, int len); - long sys32_syslog(u32 type, char * buf, u32 len) { /* sign extend len */ @@ -2639,8 +2567,6 @@ long sys32_syslog(u32 type, char * buf, } -extern asmlinkage long sys_umask(int mask); - /* Note: it is necessary to treat mask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2652,8 +2578,6 @@ asmlinkage long sys32_umask(u32 mask) } -extern asmlinkage long sys_umount(char * name, int flags); - /* Note: it is necessary to treat flags as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2756,10 +2680,6 @@ int sys32_olduname(struct oldold_utsname return error; } -extern unsigned long sys_mmap(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset); - unsigned long sys32_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -2801,8 +2721,6 @@ long sys32_utimes(char *filename, struct return ret; } -extern long sys_tgkill(int tgid, int pid, int sig); - long sys32_tgkill(u32 tgid, u32 pid, int sig) { /* sign extend tgid, pid */ @@ -2813,11 +2731,6 @@ long sys32_tgkill(u32 tgid, u32 pid, int * long long munging: * The 32 bit ABI passes long longs in an odd even register pair. */ -extern ssize_t sys_pread64(unsigned int fd, char *buf, size_t count, - loff_t pos); - -extern ssize_t sys_pwrite64(unsigned int fd, const char *buf, size_t count, - loff_t pos); compat_ssize_t sys32_pread64(unsigned int fd, char *ubuf, compat_size_t count, u32 reg6, u32 poshi, u32 poslo) @@ -2831,16 +2744,11 @@ compat_ssize_t sys32_pwrite64(unsigned i return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); } -extern ssize_t sys_readahead(int fd, loff_t offset, size_t count); - compat_ssize_t sys32_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) { return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); } -extern asmlinkage long sys_truncate(const char * path, unsigned long length); -extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys32_truncate64(const char * path, u32 reg4, unsigned long high, unsigned long low) { @@ -2853,8 +2761,6 @@ asmlinkage int sys32_ftruncate64(unsigne return sys_ftruncate(fd, (high << 32) | low); } -extern long sys_lookup_dcookie(u64 cookie64, char *buf, size_t len); - long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char *buf, size_t len) { @@ -2862,8 +2768,6 @@ long ppc32_lookup_dcookie(u32 cookie_hig buf, len); } -extern int sys_fadvise64(int fd, loff_t offset, size_t len, int advice); - long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, size_t len, int advice) { --- linux-2.6.3/arch/ppc64/kernel/time.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc64/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -418,6 +418,7 @@ int do_settimeofday(struct timespec *tv) } write_sequnlock_irqrestore(&xtime_lock, flags); + clock_was_set(); return 0; } --- linux-2.6.3/arch/ppc/8260_io/enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8260_io/enet.c 2004-02-17 22:06:13.000000000 -0800 @@ -851,7 +851,7 @@ static int __init scc_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3/arch/ppc/8260_io/fcc_enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8260_io/fcc_enet.c 2004-02-17 22:06:13.000000000 -0800 @@ -1371,7 +1371,7 @@ static int __init fec_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3/arch/ppc/8xx_io/enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8xx_io/enet.c 2004-02-17 22:06:13.000000000 -0800 @@ -949,7 +949,7 @@ static int __init scc_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3/arch/ppc/8xx_io/fec.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/ppc/8xx_io/fec.c 2004-02-17 22:06:13.000000000 -0800 @@ -1748,7 +1748,7 @@ static int __init fec_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3/arch/ppc/boot/ld.script 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/ppc/boot/ld.script 2004-02-17 22:26:09.000000000 -0800 @@ -82,6 +82,7 @@ SECTIONS *(__ksymtab) *(__ksymtab_strings) *(__bug_table) + *(__kcrctab) } } --- linux-2.6.3/arch/ppc/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ppc/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -575,7 +575,7 @@ cpumask_t irq_affinity [NR_IRQS]; static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -616,7 +616,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/ppc/kernel/setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc/kernel/setup.c 2004-02-17 22:08:48.000000000 -0800 @@ -53,7 +53,7 @@ extern void ppc6xx_idle(void); extern void power4_idle(void); extern boot_infos_t *boot_infos; -char saved_command_line[256]; +char saved_command_line[COMMAND_LINE_SIZE]; unsigned char aux_device_present; struct ide_machdep_calls ppc_ide_md; char *sysmap; @@ -501,7 +501,7 @@ void parse_bootinfo(struct bi_record *re ulong *data = rec->data; switch (rec->tag) { case BI_CMD_LINE: - memcpy(cmd_line, (void *)data, rec->size); + strlcpy(cmd_line, (void *)data, sizeof(cmd_line)); break; case BI_SYSMAP: sysmap = (char *)((data[0] >= (KERNELBASE)) ? data[0] : @@ -538,7 +538,7 @@ machine_init(unsigned long r3, unsigned unsigned long r6, unsigned long r7) { #ifdef CONFIG_CMDLINE - strcpy(cmd_line, CONFIG_CMDLINE); + strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line)); #endif /* CONFIG_CMDLINE */ #ifdef CONFIG_6xx @@ -676,7 +676,7 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = (unsigned long) klimit; /* Save unparsed command line copy for /proc/cmdline */ - strcpy(saved_command_line, cmd_line); + strlcpy(saved_command_line, cmd_line, sizeof(saved_command_line)); *cmdline_p = cmd_line; /* set up the bootmem stuff with available memory */ --- linux-2.6.3/arch/ppc/kernel/syscalls.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc/kernel/syscalls.c 2004-02-17 22:25:25.000000000 -0800 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -200,8 +201,6 @@ out: return err; } -extern int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - /* * Due to some executables calling the wrong select we sometimes * get wrong args. This determines how the args are being passed --- linux-2.6.3/arch/ppc/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/ppc/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -294,6 +294,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irqrestore(&xtime_lock, flags); + clock_was_set(); return 0; } --- linux-2.6.3/arch/s390/kernel/compat_linux.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/compat_linux.c 2004-02-17 22:25:26.000000000 -0800 @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -71,17 +72,6 @@ #include "compat_linux.h" -extern asmlinkage long sys_chown(const char *, uid_t,gid_t); -extern asmlinkage long sys_lchown(const char *, uid_t,gid_t); -extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t); -extern asmlinkage long sys_setregid(gid_t, gid_t); -extern asmlinkage long sys_setgid(gid_t); -extern asmlinkage long sys_setreuid(uid_t, uid_t); -extern asmlinkage long sys_setuid(uid_t); -extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t); -extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t); -extern asmlinkage long sys_setfsuid(uid_t); -extern asmlinkage long sys_setfsgid(gid_t); /* For this source file, we want overflow handling. */ @@ -190,40 +180,81 @@ asmlinkage long sys32_setfsgid16(u16 gid return sys_setfsgid((gid_t)gid); } +static int groups16_to_user(u16 *grouplist, struct group_info *group_info) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (u16)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int groups16_from_user(struct group_info *group_info, u16 *grouplist) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} + asmlinkage long sys32_getgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i,j; + int i; if (gidsetsize < 0) return -EINVAL; - i = current->ngroups; + + get_group_info(current->group_info); + i = current->group_info->ngroups; if (gidsetsize) { - if (i > gidsetsize) - return -EINVAL; - for(j=0;jgroups[j]; - if (copy_to_user(grouplist, groups, sizeof(u16)*i)) - return -EFAULT; + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } } +out: + put_group_info(current->group_info); return i; } asmlinkage long sys32_setgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i; + struct group_info *group_info; + int retval; if (!capable(CAP_SETGID)) return -EPERM; - if ((unsigned) gidsetsize > NGROUPS) + if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; - if (copy_from_user(groups, grouplist, gidsetsize * sizeof(u16))) - return -EFAULT; - for (i = 0 ; i < gidsetsize ; i++) - current->groups[i] = (gid_t)groups[i]; - current->ngroups = gidsetsize; - return 0; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; } asmlinkage long sys32_getuid16(void) @@ -884,9 +915,6 @@ out: return err; } -extern asmlinkage long sys_truncate(const char * path, unsigned long length); -extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys32_truncate64(const char * path, unsigned long high, unsigned long low) { if ((int)high < 0) @@ -1356,8 +1384,6 @@ int cp_compat_stat(struct kstat *stat, s return err; } -extern asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2); - asmlinkage int sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); @@ -1531,9 +1557,7 @@ struct sysinfo32 { char _f[8]; }; -extern asmlinkage int sys_sysinfo(struct sysinfo *info); - -asmlinkage int sys32_sysinfo(struct sysinfo32 *info) +asmlinkage int sys32_sysinfo(struct sysinfo32 __user *info) { struct sysinfo s; int ret, err; @@ -1561,10 +1585,8 @@ asmlinkage int sys32_sysinfo(struct sysi return ret; } -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec *interval) + struct compat_timespec __user *interval) { struct timespec t; int ret; @@ -1578,9 +1600,8 @@ asmlinkage int sys32_sched_rr_get_interv return ret; } -extern asmlinkage int sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize); - -asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, compat_size_t sigsetsize) +asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -1614,9 +1635,8 @@ asmlinkage int sys32_rt_sigprocmask(int return 0; } -extern asmlinkage int sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - -asmlinkage int sys32_rt_sigpending(compat_sigset_t *set, compat_size_t sigsetsize) +asmlinkage int sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -1723,11 +1743,8 @@ sys32_rt_sigtimedwait(compat_sigset_t *u return ret; } -extern asmlinkage int -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage int -sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) +sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 __user *uinfo) { siginfo_t info; int ret; @@ -1956,40 +1973,30 @@ out: #ifdef CONFIG_MODULES -extern asmlinkage int sys_init_module(const char *name_user, struct module *mod_user); - -/* Hey, when you're trying to init module, take time and prepare us a nice 64bit - * module structure, even if from 32bit modutils... Why to pollute kernel... :)) - */ -asmlinkage int sys32_init_module(const char *name_user, struct module *mod_user) +asmlinkage int +sys32_init_module(void __user *umod, unsigned long len, + const char __user *uargs) { - return sys_init_module(name_user, mod_user); + return sys_init_module(umod, len, uargs); } -extern asmlinkage int sys_delete_module(const char *name_user); - -asmlinkage int sys32_delete_module(const char *name_user) +asmlinkage int +sys32_delete_module(const char __user *name_user, unsigned int flags) { - return sys_delete_module(name_user); + return sys_delete_module(name_user, flags); } -struct module_info32 { - u32 addr; - u32 size; - u32 flags; - s32 usecount; -}; - #else /* CONFIG_MODULES */ asmlinkage int -sys32_init_module(const char *name_user, struct module *mod_user) +sys32_init_module(void __user *umod, unsigned long len, + const char __user *uargs) { return -ENOSYS; } asmlinkage int -sys32_delete_module(const char *name_user) +sys32_delete_module(const char __user *name_user, unsigned int flags) { return -ENOSYS; } @@ -2153,10 +2160,6 @@ static int nfs_getfh32_res_trans(union n return copy_to_user(res32, kres, sizeof(*res32)) ? -EFAULT : 0; } -/* -asmlinkage long sys_ni_syscall(void); -*/ - int asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) { struct nfsctl_arg *karg = NULL; @@ -2271,9 +2274,8 @@ asmlinkage int sys32_settimeofday(struct return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage int sys_utimes(char *, struct timeval *); - -asmlinkage int sys32_utimes(char *filename, struct compat_timeval *tvs) +asmlinkage int sys32_utimes(char __user *filename, + struct compat_timeval __user *tvs) { char *kfilename; struct timeval ktvs[2]; @@ -2307,8 +2309,6 @@ asmlinkage int sys32_pause(void) return -ERESTARTNOHAND; } -extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); asmlinkage int sys32_prctl(int option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { @@ -2320,12 +2320,6 @@ asmlinkage int sys32_prctl(int option, u } -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); - -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); - asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, char *ubuf, compat_size_t count, u32 poshi, u32 poslo) { @@ -2342,15 +2336,11 @@ asmlinkage compat_ssize_t sys32_pwrite64 return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -extern asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) { return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2370,9 +2360,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, - loff_t *offset, size_t count); - asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t *offset, s32 count) { @@ -2466,8 +2453,6 @@ asmlinkage int sys32_adjtimex(struct tim return ret; } -extern asmlinkage long sys_setpriority(int which, int who, int niceval); - asmlinkage int sys_setpriority32(u32 which, u32 who, u32 niceval) { return sys_setpriority((int) which, @@ -2485,7 +2470,7 @@ struct __sysctl_args32 { u32 __unused[4]; }; -extern asmlinkage long sys32_sysctl(struct __sysctl_args32 *args) +asmlinkage long sys32_sysctl(struct __sysctl_args32 *args) { struct __sysctl_args32 tmp; int error; @@ -2677,8 +2662,6 @@ out: return error; } -asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count); - asmlinkage compat_ssize_t sys32_read(unsigned int fd, char * buf, size_t count) { if ((compat_ssize_t) count < 0) @@ -2687,8 +2670,6 @@ asmlinkage compat_ssize_t sys32_read(uns return sys_read(fd, buf, count); } -asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); - asmlinkage compat_ssize_t sys32_write(unsigned int fd, char * buf, size_t count) { if ((compat_ssize_t) count < 0) --- linux-2.6.3/arch/s390/kernel/compat_linux.h 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/compat_linux.h 2004-02-17 22:25:22.000000000 -0800 @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/s390/kernel/compat_wrapper.S 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/compat_wrapper.S 2004-02-17 22:25:26.000000000 -0800 @@ -567,13 +567,15 @@ compat_sys_sigprocmask_wrapper: .globl sys32_init_module_wrapper sys32_init_module_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # struct module * + llgtr %r2,%r2 # void * + llgfr %r3,%r3 # unsigned long + llgtr %r4,%r4 # char * jg sys32_init_module # branch to system call .globl sys32_delete_module_wrapper sys32_delete_module_wrapper: llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # unsigned int jg sys32_delete_module # branch to system call .globl sys32_quotactl_wrapper --- linux-2.6.3/arch/s390/kernel/s390_ksyms.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/s390_ksyms.c 2004-02-17 22:25:28.000000000 -0800 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/s390/kernel/sys_s390.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/sys_s390.c 2004-02-17 22:25:30.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -125,8 +126,6 @@ out: return error; } -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - #ifndef CONFIG_ARCH_S390X struct sel_arg_struct { unsigned long n; @@ -290,15 +289,13 @@ asmlinkage int sys_olduname(struct oldol return error; } -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on) { return -ENOSYS; } #else /* CONFIG_ARCH_S390X */ -extern asmlinkage int sys_newuname(struct new_utsname * name); - asmlinkage int s390x_newuname(struct new_utsname * name) { int ret = sys_newuname(name); @@ -310,8 +307,6 @@ asmlinkage int s390x_newuname(struct new return ret; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int s390x_personality(unsigned long personality) { int ret; @@ -331,8 +326,6 @@ asmlinkage int s390x_personality(unsigne */ #ifndef CONFIG_ARCH_S390X -extern asmlinkage long sys_fadvise64(int, loff_t, size_t, int); - asmlinkage long s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice) { @@ -342,8 +335,6 @@ s390_fadvise64(int fd, u32 offset_high, #endif -extern asmlinkage long sys_fadvise64_64(int, loff_t, loff_t, int); - struct fadvise64_64_args { int fd; long long offset; --- linux-2.6.3/arch/s390/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/s390/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -142,6 +142,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/sh/kernel/sys_sh.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sh/kernel/sys_sh.c 2004-02-17 22:25:25.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -240,16 +241,12 @@ asmlinkage int sys_uname(struct old_utsn asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char * buf, size_t count, long dummy, loff_t pos) { - extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); return sys_pread64(fd, buf, count, pos); } asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char * buf, size_t count, long dummy, loff_t pos) { - extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); return sys_pwrite64(fd, buf, count, pos); } --- linux-2.6.3/arch/sparc64/Kconfig 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/sparc64/Kconfig 2004-02-17 22:26:05.000000000 -0800 @@ -718,12 +718,19 @@ config DEBUG_BOOTMEM depends on DEBUG_KERNEL bool "Debug BOOTMEM initialization" +config LOCKMETER + bool "Kernel lock metering" + depends on SMP && !PREEMPT + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + # We have a custom atomic_dec_and_lock() implementation but it's not # compatible with spinlock debugging so we need to fall back on # the generic version in that case. config HAVE_DEC_LOCK bool - depends on SMP && !DEBUG_SPINLOCK + depends on SMP && !DEBUG_SPINLOCK && !LOCKMETER default y config MCOUNT --- linux-2.6.3/arch/sparc64/kernel/ioctl32.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/sparc64/kernel/ioctl32.c 2004-02-17 22:25:28.000000000 -0800 @@ -12,6 +12,7 @@ #define INCLUDES #include "compat_ioctl.c" #include +#include #include #include #include --- linux-2.6.3/arch/sparc64/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/sparc64/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -1211,7 +1211,7 @@ static int irq_affinity_read_proc (char if (cpus_empty(mask)) mask = cpu_online_map; - len = cpumask_snprintf(page, count, mask); + len = cpumask_scnprintf(page, count, mask); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/sparc64/kernel/setup.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sparc64/kernel/setup.c 2004-02-17 22:25:30.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,6 @@ struct screen_info screen_info = { void (*prom_palette)(int); void (*prom_keyboard)(void); -asmlinkage void sys_sync(void); /* it's really int */ static void prom_console_write(struct console *con, const char *s, unsigned n) @@ -603,7 +603,7 @@ static int __init set_preferred_console( } console_initcall(set_preferred_console); -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on) { return -EIO; } --- linux-2.6.3/arch/sparc64/kernel/sparc64_ksyms.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/sparc64/kernel/sparc64_ksyms.c 2004-02-17 22:25:25.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -84,21 +85,13 @@ extern void syscall_trace(void); extern u32 sunos_sys_table[], sys_call_table32[]; extern void tl0_solaris(void); extern void sys_sigsuspend(void); -extern int sys_getppid(void); -extern int sys_getpid(void); -extern int sys_geteuid(void); -extern int sys_getuid(void); -extern int sys_getegid(void); -extern int sys_getgid(void); extern int svr4_getcontext(svr4_ucontext_t *uc, struct pt_regs *regs); extern int svr4_setcontext(svr4_ucontext_t *uc, struct pt_regs *regs); -extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); extern int compat_sys_ioctl(unsigned int fd, unsigned int cmd, u32 arg); extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *); extern long sparc32_open(const char * filename, int flags, int mode); extern int io_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot, int space); -extern long sys_close(unsigned int); - + extern int __ashrdi3(int, int); extern void dump_thread(struct pt_regs *, struct user *); --- linux-2.6.3/arch/sparc64/kernel/sunos_ioctl32.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/sparc64/kernel/sunos_ioctl32.c 2004-02-17 22:25:22.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -90,8 +91,6 @@ struct ifconf32 { compat_caddr_t ifcbuf; }; -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - extern asmlinkage int compat_sys_ioctl(unsigned int, unsigned int, u32); extern asmlinkage int sys_setsid(void); --- linux-2.6.3/arch/sparc64/kernel/sys_sparc32.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/sparc64/kernel/sys_sparc32.c 2004-02-17 22:25:30.000000000 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -88,17 +89,6 @@ __ret; \ }) -extern asmlinkage long sys_chown(const char *, uid_t,gid_t); -extern asmlinkage long sys_lchown(const char *, uid_t,gid_t); -extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t); -extern asmlinkage long sys_setregid(gid_t, gid_t); -extern asmlinkage long sys_setgid(gid_t); -extern asmlinkage long sys_setreuid(uid_t, uid_t); -extern asmlinkage long sys_setuid(uid_t); -extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t); -extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t); -extern asmlinkage long sys_setfsuid(uid_t); -extern asmlinkage long sys_setfsgid(gid_t); asmlinkage long sys32_chown16(const char * filename, u16 user, u16 group) { @@ -179,40 +169,81 @@ asmlinkage long sys32_setfsgid16(u16 gid return sys_setfsgid((gid_t)gid); } +static int groups16_to_user(u16 *grouplist, struct group_info *group_info) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (u16)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int groups16_from_user(struct group_info *group_info, u16 *grouplist) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} + asmlinkage long sys32_getgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i,j; + int i; if (gidsetsize < 0) return -EINVAL; - i = current->ngroups; + + get_group_info(current->group_info); + i = current->group_info->ngroups; if (gidsetsize) { - if (i > gidsetsize) - return -EINVAL; - for(j=0;jgroups[j]; - if (copy_to_user(grouplist, groups, sizeof(u16)*i)) - return -EFAULT; + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } } +out: + put_group_info(current->group_info); return i; } asmlinkage long sys32_setgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i; + struct group_info *group_info; + int retval; if (!capable(CAP_SETGID)) return -EPERM; - if ((unsigned) gidsetsize > NGROUPS) + if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; - if (copy_from_user(groups, grouplist, gidsetsize * sizeof(u16))) - return -EFAULT; - for (i = 0 ; i < gidsetsize ; i++) - current->groups[i] = (gid_t)groups[i]; - current->ngroups = gidsetsize; - return 0; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; } asmlinkage long sys32_getuid16(void) @@ -251,9 +282,7 @@ static inline long put_tv32(struct compa __put_user(i->tv_usec, &o->tv_usec))); } -extern asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on); - -asmlinkage int sys32_ioperm(u32 from, u32 num, int on) +asmlinkage long sys32_ioperm(u32 from, u32 num, int on) { return sys_ioperm((unsigned long)from, (unsigned long)num, on); } @@ -795,9 +824,6 @@ out: return err; } -extern asmlinkage long sys_truncate(const char * path, unsigned long length); -extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys32_truncate64(const char * path, unsigned long high, unsigned long low) { if ((int)high < 0) @@ -1303,8 +1329,6 @@ int cp_compat_stat(struct kstat *stat, s return err; } -extern asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2); - asmlinkage int sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); @@ -1530,8 +1554,6 @@ struct sysinfo32 { char _f[20-2*sizeof(int)-sizeof(int)]; }; -extern asmlinkage int sys_sysinfo(struct sysinfo *info); - asmlinkage int sys32_sysinfo(struct sysinfo32 *info) { struct sysinfo s; @@ -1579,8 +1601,6 @@ asmlinkage int sys32_sysinfo(struct sysi return ret; } -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) { struct timespec t; @@ -1595,8 +1615,6 @@ asmlinkage int sys32_sched_rr_get_interv return ret; } -extern asmlinkage int sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize); - asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, compat_size_t sigsetsize) { sigset_t s; @@ -1631,8 +1649,6 @@ asmlinkage int sys32_rt_sigprocmask(int return 0; } -extern asmlinkage int sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - asmlinkage int sys32_rt_sigpending(compat_sigset_t *set, compat_size_t sigsetsize) { sigset_t s; @@ -1740,9 +1756,6 @@ sys32_rt_sigtimedwait(compat_sigset_t *u return ret; } -extern asmlinkage int -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage int sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { @@ -2073,15 +2086,11 @@ out: #ifdef CONFIG_MODULES -extern asmlinkage long sys_init_module(void *, unsigned long, const char *); - asmlinkage int sys32_init_module(void *umod, u32 len, const char *uargs) { return sys_init_module(umod, len, uargs); } -extern asmlinkage long sys_delete_module(const char *, unsigned int); - asmlinkage int sys32_delete_module(const char *name_user, unsigned int flags) { return sys_delete_module(name_user, flags); @@ -2323,7 +2332,6 @@ done: return err; } #else /* !NFSD */ -extern asmlinkage long sys_ni_syscall(void); int asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2) { return sys_ni_syscall(); @@ -2416,17 +2424,6 @@ asmlinkage int sys32_pause(void) } /* PCI config space poking. */ -extern asmlinkage int sys_pciconfig_read(unsigned long bus, - unsigned long dfn, - unsigned long off, - unsigned long len, - unsigned char *buf); - -extern asmlinkage int sys_pciconfig_write(unsigned long bus, - unsigned long dfn, - unsigned long off, - unsigned long len, - unsigned char *buf); asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) { @@ -2446,9 +2443,6 @@ asmlinkage int sys32_pciconfig_write(u32 (unsigned char *)AA(ubuf)); } -extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); - asmlinkage int sys32_prctl(int option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { return sys_prctl(option, @@ -2459,12 +2453,6 @@ asmlinkage int sys32_prctl(int option, u } -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); - -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); - asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, char *ubuf, compat_size_t count, u32 poshi, u32 poslo) { @@ -2477,8 +2465,6 @@ asmlinkage compat_ssize_t sys32_pwrite64 return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -extern asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) { return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); @@ -2495,8 +2481,6 @@ long sys32_fadvise64_64(int fd, u32 offh ((loff_t)AA(lenhi)<<32)|AA(lenlo), advice); } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2516,8 +2500,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t *offset, size_t count); - asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2692,8 +2674,6 @@ out: return ret; } -extern asmlinkage long sys_setpriority(int which, int who, int niceval); - asmlinkage int sys_setpriority32(u32 which, u32 who, u32 niceval) { return sys_setpriority((int) which, @@ -2753,8 +2733,6 @@ asmlinkage long sys32_sysctl(struct __sy #endif } -extern long sys_lookup_dcookie(u64 cookie64, char *buf, size_t len); - long sys32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char *buf, size_t len) { return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low, --- linux-2.6.3/arch/sparc64/kernel/sys_sparc.c 2003-07-13 21:44:34.000000000 -0700 +++ 25/arch/sparc64/kernel/sys_sparc.c 2004-02-17 22:25:26.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -161,8 +162,6 @@ unsigned long get_fb_unmapped_area(struc return addr; } -extern asmlinkage unsigned long sys_brk(unsigned long brk); - asmlinkage unsigned long sparc_brk(unsigned long brk) { /* People could try to be nasty and use ta 0x6d in 32bit programs */ @@ -280,8 +279,6 @@ out: return err; } -extern asmlinkage int sys_newuname(struct new_utsname __user *name); - asmlinkage int sparc64_newuname(struct new_utsname __user *name) { int ret = sys_newuname(name); @@ -292,8 +289,6 @@ asmlinkage int sparc64_newuname(struct n return ret; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int sparc64_personality(unsigned long personality) { int ret; --- linux-2.6.3/arch/sparc64/kernel/sys_sunos32.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc64/kernel/sys_sunos32.c 2004-02-17 22:25:25.000000000 -0800 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -584,11 +585,6 @@ struct sunos_nfs_mount_args { char *netname; /* server's netname */ }; -extern asmlinkage int sys_mount(char *, char *, char *, unsigned long, void *); -extern asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen); -extern asmlinkage int sys_socket(int family, int type, int protocol); -extern asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen); - /* Bind the socket on a local reserved port and connect it to the * remote server. This on Linux/i386 is done by the mount program, @@ -781,8 +777,6 @@ out: return ret; } -extern asmlinkage int sys_setsid(void); -extern asmlinkage int sys_setpgid(pid_t, pid_t); asmlinkage int sunos_setpgrp(pid_t pid, pid_t pgid) { @@ -1200,11 +1194,6 @@ static inline int check_nonblock(int ret return ret; } -extern asmlinkage ssize_t sys_read(unsigned int fd, char *buf, unsigned long count); -extern asmlinkage ssize_t sys_write(unsigned int fd, char *buf, unsigned long count); -extern asmlinkage int sys_recv(int fd, void *ubuf, size_t size, unsigned flags); -extern asmlinkage int sys_send(int fd, void *buff, size_t len, unsigned flags); -extern asmlinkage int sys_accept(int fd, struct sockaddr *sa, int *addrlen); extern asmlinkage int sys32_readv(u32 fd, u32 vector, s32 count); extern asmlinkage int sys32_writev(u32 fd, u32 vector, s32 count); @@ -1302,9 +1291,6 @@ asmlinkage int sunos_sigaction (int sig, return ret; } -extern asmlinkage int sys_setsockopt(int fd, int level, int optname, - char *optval, int optlen); - asmlinkage int sunos_setsockopt(int fd, int level, int optname, u32 optval, int optlen) { --- linux-2.6.3/arch/sparc64/kernel/time.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/sparc64/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -1126,6 +1126,7 @@ int do_settimeofday(struct timespec *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq(&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/sparc64/lib/rwlock.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/sparc64/lib/rwlock.S 2004-02-17 22:26:05.000000000 -0800 @@ -85,5 +85,20 @@ __write_trylock_succeed: __write_trylock_fail: retl mov 0, %o0 + + .globl __read_trylock +__read_trylock: /* %o0 = lock_ptr */ + ldsw [%o0], %g5 + brlz,pn %g5, 100f + add %g5, 1, %g7 + cas [%o0], %g5, %g7 + cmp %g5, %g7 + bne,pn %icc, __read_trylock + membar #StoreLoad | #StoreStore + retl + mov 1, %o0 +100: retl + mov 0, %o0 + rwlock_impl_end: --- linux-2.6.3/arch/sparc64/prom/printf.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/sparc64/prom/printf.c 2004-02-17 22:25:19.000000000 -0800 @@ -40,7 +40,7 @@ prom_printf(char *fmt, ...) int i; va_start(args, fmt); - i = vsnprintf(ppbuf, sizeof(ppbuf), fmt, args); + i = vscnprintf(ppbuf, sizeof(ppbuf), fmt, args); va_end(args); prom_write(ppbuf, i); --- linux-2.6.3/arch/sparc64/solaris/ioctl.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/sparc64/solaris/ioctl.c 2004-02-17 22:25:22.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,6 @@ #include "conv.h" #include "socksys.h" -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); extern asmlinkage int compat_sys_ioctl(unsigned int fd, unsigned int cmd, u32 arg); asmlinkage int solaris_ioctl(unsigned int fd, unsigned int cmd, u32 arg); --- linux-2.6.3/arch/sparc64/solaris/socksys.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc64/solaris/socksys.c 2004-02-17 22:25:29.000000000 -0800 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -35,9 +36,6 @@ #include "conv.h" #include "socksys.h" -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); - static int af_inet_protocols[] = { IPPROTO_ICMP, IPPROTO_ICMP, IPPROTO_IGMP, IPPROTO_IPIP, IPPROTO_TCP, IPPROTO_EGP, IPPROTO_PUP, IPPROTO_UDP, IPPROTO_IDP, IPPROTO_RAW, --- linux-2.6.3/arch/sparc64/solaris/timod.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc64/solaris/timod.c 2004-02-17 22:25:22.000000000 -0800 @@ -27,8 +27,6 @@ #include "conv.h" #include "socksys.h" -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); asmlinkage int solaris_ioctl(unsigned int fd, unsigned int cmd, u32 arg); static spinlock_t timod_pagelock = SPIN_LOCK_UNLOCKED; --- linux-2.6.3/arch/sparc/kernel/setup.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/sparc/kernel/setup.c 2004-02-17 22:25:25.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,6 @@ struct screen_info screen_info = { extern unsigned long trapbase; void (*prom_palette)(int); -asmlinkage void sys_sync(void); /* it's really int */ /* Pretty sick eh? */ void prom_sync_me(void) --- linux-2.6.3/arch/sparc/kernel/sunos_ioctl.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/sparc/kernel/sunos_ioctl.c 2004-02-17 22:25:22.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,6 @@ extern char sunkbd_layout; /* NR_OPEN is now larger and dynamic in recent kernels. */ #define SUNOS_NR_OPEN 256 -extern asmlinkage int sys_ioctl(unsigned int, unsigned int, unsigned long); extern asmlinkage int sys_setsid(void); asmlinkage int sunos_ioctl (int fd, unsigned long cmd, unsigned long arg) --- linux-2.6.3/arch/sparc/kernel/sys_sparc.c 2003-07-13 21:44:34.000000000 -0700 +++ 25/arch/sparc/kernel/sys_sparc.c 2004-02-17 22:25:26.000000000 -0800 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -78,8 +79,6 @@ unsigned long arch_get_unmapped_area(str } } -extern asmlinkage unsigned long sys_brk(unsigned long brk); - asmlinkage unsigned long sparc_brk(unsigned long brk) { if(ARCH_SUN4C_SUN4) { --- linux-2.6.3/arch/sparc/kernel/sys_sunos.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc/kernel/sys_sunos.c 2004-02-17 22:25:25.000000000 -0800 @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -564,8 +565,6 @@ asmlinkage int sunos_pathconf(char *path } /* SunOS mount system call emulation */ -extern asmlinkage int -sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp); asmlinkage int sunos_select(int width, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) { @@ -621,11 +620,6 @@ struct sunos_nfs_mount_args { }; -extern asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen); -extern asmlinkage int sys_socket(int family, int type, int protocol); -extern asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen); - - /* Bind the socket on a local reserved port and connect it to the * remote server. This on Linux/i386 is done by the mount program, * not by the kernel. @@ -814,8 +808,6 @@ out: return ret; } -extern asmlinkage int sys_setsid(void); -extern asmlinkage int sys_setpgid(pid_t, pid_t); asmlinkage int sunos_setpgrp(pid_t pid, pid_t pgid) { @@ -1050,15 +1042,6 @@ static inline int check_nonblock(int ret return ret; } -extern asmlinkage ssize_t sys_read(unsigned int fd,char *buf,int count); -extern asmlinkage ssize_t sys_write(unsigned int fd,char *buf,int count); -extern asmlinkage int sys_recv(int fd, void * ubuf, int size, unsigned flags); -extern asmlinkage int sys_send(int fd, void * buff, int len, unsigned flags); -extern asmlinkage int sys_accept(int fd, struct sockaddr *sa, int *addrlen); -extern asmlinkage int sys_readv(unsigned long fd, const struct iovec * vector, long count); -extern asmlinkage int sys_writev(unsigned long fd, const struct iovec * vector, long count); - - asmlinkage int sunos_read(unsigned int fd,char *buf,int count) { int ret; @@ -1164,9 +1147,6 @@ sunos_sigaction(int sig, const struct ol } -extern asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen); -extern asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen); - asmlinkage int sunos_setsockopt(int fd, int level, int optname, char *optval, int optlen) { --- linux-2.6.3/arch/sparc/kernel/time.c 2003-11-26 13:53:35.000000000 -0800 +++ 25/arch/sparc/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -535,6 +535,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irq(&xtime_lock); ret = bus_do_settimeofday(tv); write_sequnlock_irq(&xtime_lock); + clock_was_set(); return ret; } --- linux-2.6.3/arch/sparc/prom/printf.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/sparc/prom/printf.c 2004-02-17 22:25:19.000000000 -0800 @@ -39,7 +39,7 @@ prom_printf(char *fmt, ...) int i; va_start(args, fmt); - i = vsnprintf(ppbuf, sizeof(ppbuf), fmt, args); + i = vscnprintf(ppbuf, sizeof(ppbuf), fmt, args); va_end(args); prom_write(ppbuf, i); --- linux-2.6.3/arch/um/drivers/net_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/drivers/net_kern.c 2004-02-17 22:06:13.000000000 -0800 @@ -358,8 +358,12 @@ static int eth_configure(int n, void *in rtnl_lock(); err = register_netdevice(dev); rtnl_unlock(); - if (err) + if (err) { + device->dev = NULL; + /* XXX: should we call ->remove() here? */ + free_netdev(dev); return 1; + } lp = dev->priv; INIT_LIST_HEAD(&lp->list); --- linux-2.6.3/arch/um/include/kern_util.h 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/include/kern_util.h 2004-02-17 22:25:26.000000000 -0800 @@ -45,7 +45,6 @@ extern int page_mask(void); extern int need_finish_fork(void); extern void free_stack(unsigned long stack, int order); extern void add_input_request(int op, void (*proc)(int), void *arg); -extern int sys_execve(char *file, char **argv, char **env); extern char *current_cmd(void); extern void timer_handler(int sig, union uml_pt_regs *regs); extern int set_signals(int enable); --- linux-2.6.3/arch/um/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/um/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -575,7 +575,7 @@ static cpumask_t irq_affinity [NR_IRQS] static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -613,7 +613,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3/arch/um/kernel/syscall_kern.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/syscall_kern.c 2004-02-17 22:25:28.000000000 -0800 @@ -11,6 +11,7 @@ #include "linux/msg.h" #include "linux/shm.h" #include "linux/sys.h" +#include "linux/syscalls.h" #include "linux/unistd.h" #include "linux/slab.h" #include "linux/utime.h" --- linux-2.6.3/arch/um/kernel/sys_call_table.c 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/um/kernel/sys_call_table.c 2004-02-17 22:25:27.000000000 -0800 @@ -8,6 +8,7 @@ #include "linux/version.h" #include "linux/sys.h" #include "linux/swap.h" +#include "linux/syscalls.h" #include "linux/sysctl.h" #include "asm/signal.h" #include "sysdep/syscalls.h" @@ -268,9 +269,9 @@ syscall_handler_t *sys_call_table[] = { [ __NR_creat ] = sys_creat, [ __NR_link ] = sys_link, [ __NR_unlink ] = sys_unlink, + [ __NR_execve ] = (syscall_handler_t *) sys_execve, /* declared differently in kern_util.h */ - [ __NR_execve ] = (syscall_handler_t *) sys_execve, [ __NR_chdir ] = sys_chdir, [ __NR_time ] = um_time, [ __NR_mknod ] = sys_mknod, --- linux-2.6.3/arch/um/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -93,6 +93,7 @@ void do_gettimeofday(struct timeval *tv) gettimeofday(tv, NULL); timeradd(tv, &local_offset, tv); time_unlock(flags); + clock_was_set(); } EXPORT_SYMBOL(do_gettimeofday); --- linux-2.6.3/arch/v850/kernel/syscalls.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/v850/kernel/syscalls.c 2004-02-17 22:25:28.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3/arch/v850/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/v850/kernel/time.c 2004-02-17 22:25:52.000000000 -0800 @@ -193,6 +193,7 @@ int do_settimeofday(struct timespec *tv) time_esterror = NTP_PHASE_LIMIT; write_sequnlock_irq (&xtime_lock); + clock_was_set(); return 0; } --- linux-2.6.3/arch/x86_64/ia32/ia32_ioctl.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/ia32/ia32_ioctl.c 2004-02-17 22:25:22.000000000 -0800 @@ -10,12 +10,11 @@ */ #define INCLUDES +#include #include "compat_ioctl.c" #include #include -extern asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - #define CODE #include "compat_ioctl.c" --- linux-2.6.3/arch/x86_64/ia32/ipc32.c 2003-08-08 22:55:11.000000000 -0700 +++ 25/arch/x86_64/ia32/ipc32.c 2004-02-17 22:25:28.000000000 -0800 @@ -1,5 +1,6 @@ #include #include +#include #include #include #include --- linux-2.6.3/arch/x86_64/ia32/ptrace32.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/ia32/ptrace32.c 2004-02-17 22:25:28.000000000 -0800 @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -223,8 +225,6 @@ static struct task_struct *find_target(i } -extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data); - asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) { struct task_struct *child; --- linux-2.6.3/arch/x86_64/ia32/sys_ia32.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/x86_64/ia32/sys_ia32.c 2004-02-17 22:25:30.000000000 -0800 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -110,9 +111,6 @@ int cp_compat_stat(struct kstat *kbuf, s return 0; } -extern long sys_truncate(char *, loff_t); -extern long sys_ftruncate(int, loff_t); - asmlinkage long sys32_truncate64(char * filename, unsigned long offset_low, unsigned long offset_high) { @@ -236,8 +234,6 @@ sys32_mmap(struct mmap_arg_struct *arg) return retval; } -extern asmlinkage long sys_mprotect(unsigned long start,size_t len,unsigned long prot); - asmlinkage long sys32_mprotect(unsigned long start, size_t len, unsigned long prot) { @@ -363,12 +359,9 @@ sys32_sigaction (int sig, struct old_sig return ret; } -extern asmlinkage long sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, - size_t sigsetsize); - asmlinkage long -sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, - unsigned int sigsetsize) +sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, unsigned int sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -734,9 +727,6 @@ sys32_old_select(struct sel_arg_struct * (struct compat_timeval *)A(a.tvp)); } -asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long); -asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long); - static struct iovec * get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp) { @@ -878,18 +868,12 @@ int sys32_ni_syscall(int call) /* 32-bit timeval and related flotsam. */ -extern asmlinkage long sys_sysfs(int option, unsigned long arg1, - unsigned long arg2); - asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); } -extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void *data); - static char *badfs[] = { "smbfs", "ncpfs", NULL }; @@ -940,8 +924,6 @@ struct sysinfo32 { char _f[20-2*sizeof(u32)-sizeof(int)]; }; -extern asmlinkage long sys_sysinfo(struct sysinfo *info); - asmlinkage long sys32_sysinfo(struct sysinfo32 *info) { @@ -991,9 +973,6 @@ sys32_sysinfo(struct sysinfo32 *info) return 0; } -extern asmlinkage long sys_sched_rr_get_interval(pid_t pid, - struct timespec *interval); - asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) { @@ -1009,10 +988,8 @@ sys32_sched_rr_get_interval(compat_pid_t return ret; } -extern asmlinkage long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - asmlinkage long -sys32_rt_sigpending(compat_sigset_t *set, compat_size_t sigsetsize) +sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -1035,9 +1012,6 @@ sys32_rt_sigpending(compat_sigset_t *set return ret; } -extern asmlinkage long -sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, - const struct timespec *uts, size_t sigsetsize); asmlinkage long sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo, @@ -1077,9 +1051,6 @@ sys32_rt_sigtimedwait(compat_sigset_t *u return ret; } -extern asmlinkage long -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { @@ -1165,12 +1136,6 @@ sys32_sysctl(struct sysctl_ia32 *args32) #endif } -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); - -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); - /* warning: next two assume little endian */ asmlinkage long sys32_pread(unsigned int fd, char *ubuf, u32 count, u32 poslo, u32 poshi) @@ -1187,8 +1152,6 @@ sys32_pwrite(unsigned int fd, char *ubuf } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage long sys32_personality(unsigned long personality) { @@ -1202,9 +1165,6 @@ sys32_personality(unsigned long personal return ret; } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, - size_t count); - asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { @@ -1375,9 +1335,7 @@ long sys32_uname(struct old_utsname * na return err?-EFAULT:0; } -extern int sys_ustat(dev_t, struct ustat *); - -long sys32_ustat(unsigned dev, struct ustat32 *u32p) +long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) { struct ustat u; mm_segment_t seg; @@ -1500,15 +1458,11 @@ asmlinkage long sys32_clone(unsigned int * Some system calls that need sign extended arguments. This could be done by a generic wrapper. */ -extern off_t sys_lseek (unsigned int fd, off_t offset, unsigned int origin); - long sys32_lseek (unsigned int fd, int offset, unsigned int whence) { return sys_lseek(fd, offset, whence); } -extern int sys_kill(pid_t pid, int sig); - long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); @@ -1736,15 +1690,12 @@ done: return err; } #else /* !NFSD */ -extern asmlinkage long sys_ni_syscall(void); long asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2) { return sys_ni_syscall(); } #endif -extern long sys_io_setup(unsigned nr_reqs, aio_context_t *ctx); - long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p) { long ret; @@ -1802,11 +1753,6 @@ asmlinkage long sys32_io_submit(aio_cont return i ? i : ret; } -extern asmlinkage long sys_io_getevents(aio_context_t ctx_id, - long min_nr, - long nr, - struct io_event *events, - struct timespec *timeout); asmlinkage long sys32_io_getevents(aio_context_t ctx_id, unsigned long min_nr, @@ -1895,8 +1841,6 @@ sys32_timer_create(u32 clock, struct sig return err; } -extern long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); - long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, __u32 len_low, __u32 len_high, int advice) { --- linux-2.6.3/arch/x86_64/Kconfig 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/Kconfig 2004-02-17 22:08:46.000000000 -0800 @@ -48,12 +48,14 @@ config EARLY_PRINTK bool default y help - Write kernel log output directly into the VGA buffer. This is useful - for kernel debugging when your machine crashes very early before - the console code is initialized. For normal operation it is not - recommended because it looks ugly and doesn't cooperate with - klogd/syslogd or the X server. You should normally N here, unless - you want to debug such a crash. + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. config HPET_TIMER bool @@ -433,6 +435,7 @@ config INIT_DEBUG config DEBUG_INFO bool "Compile the kernel with debug info" depends on DEBUG_KERNEL + default n help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. @@ -464,9 +467,8 @@ config IOMMU_LEAK help Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. - -#config X86_REMOTE_DEBUG -# bool "kgdb debugging stub" + +source "arch/x86_64/Kconfig.kgdb" endmenu --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/Kconfig.kgdb 2004-02-17 22:08:45.000000000 -0800 @@ -0,0 +1,176 @@ +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + select DEBUG_INFO + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + --- linux-2.6.3/arch/x86_64/kernel/early_printk.c 2003-06-14 12:18:26.000000000 -0700 +++ 25/arch/x86_64/kernel/early_printk.c 2004-02-17 22:25:19.000000000 -0800 @@ -7,7 +7,11 @@ /* Simple VGA output */ +#ifdef __i386__ +#define VGABASE __pa(__PAGE_OFFSET + 0xb8000UL) +#else #define VGABASE 0xffffffff800b8000UL +#endif #define MAX_YPOS 25 #define MAX_XPOS 80 @@ -22,15 +26,14 @@ static void early_vga_write(struct conso while ((c = *str++) != '\0' && n-- > 0) { if (current_ypos >= MAX_YPOS) { /* scroll 1 line up */ - for(k = 1, j = 0; k < MAX_YPOS; k++, j++) { - for(i = 0; i < MAX_XPOS; i++) { + for (k = 1, j = 0; k < MAX_YPOS; k++, j++) { + for (i = 0; i < MAX_XPOS; i++) { writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), VGABASE + 2*(MAX_XPOS*j + i)); } } - for(i = 0; i < MAX_XPOS; i++) { + for (i = 0; i < MAX_XPOS; i++) writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); - } current_ypos = MAX_YPOS-1; } if (c == '\n') { @@ -38,7 +41,8 @@ static void early_vga_write(struct conso current_ypos++; } else if (c != '\r') { writew(((0x7 << 8) | (unsigned short) c), - VGABASE + 2*(MAX_XPOS*current_ypos + current_xpos++)); + VGABASE + 2*(MAX_XPOS*current_ypos + + current_xpos++)); if (current_xpos >= MAX_XPOS) { current_xpos = 0; current_ypos++; @@ -78,7 +82,7 @@ static int early_serial_putc(unsigned ch { unsigned timeout = 0xffff; while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) - rep_nop(); + cpu_relax(); outb(ch, early_serial_base + TXR); return timeout ? 0 : -1; } @@ -93,10 +97,13 @@ static void early_serial_write(struct co } } +#define DEFAULT_BAUD 9600 + static __init void early_serial_init(char *opt) { unsigned char c; - unsigned divisor, baud = 38400; + unsigned divisor; + unsigned baud = DEFAULT_BAUD; char *s, *e; if (*opt == ',') @@ -109,25 +116,26 @@ static __init void early_serial_init(cha early_serial_base = simple_strtoul(s, &e, 16); } else { static int bases[] = { 0x3f8, 0x2f8 }; - if (!strncmp(s,"ttyS",4)) - s+=4; - port = simple_strtoul(s, &e, 10); - if (port > 1 || s == e) - port = 0; - early_serial_base = bases[port]; - } + + if (!strncmp(s,"ttyS",4)) + s += 4; + port = simple_strtoul(s, &e, 10); + if (port > 1 || s == e) + port = 0; + early_serial_base = bases[port]; + } } - outb(0x3, early_serial_base + LCR); /* 8n1 */ - outb(0, early_serial_base + IER); /* no interrupt */ - outb(0, early_serial_base + FCR); /* no fifo */ - outb(0x3, early_serial_base + MCR); /* DTR + RTS */ + outb(0x3, early_serial_base + LCR); /* 8n1 */ + outb(0, early_serial_base + IER); /* no interrupt */ + outb(0, early_serial_base + FCR); /* no fifo */ + outb(0x3, early_serial_base + MCR); /* DTR + RTS */ s = strsep(&opt, ","); if (s != NULL) { baud = simple_strtoul(s, &e, 0); if (baud == 0 || s == e) - baud = 38400; + baud = DEFAULT_BAUD; } divisor = 115200 / baud; @@ -154,8 +162,9 @@ void early_printk(const char *fmt, ...) char buf[512]; int n; va_list ap; + va_start(ap,fmt); - n = vsnprintf(buf,512,fmt,ap); + n = vscnprintf(buf,512,fmt,ap); early_console->write(early_console,buf,n); va_end(ap); } @@ -170,6 +179,8 @@ int __init setup_early_printk(char *opt) if (early_console_initialized) return -1; + opt = strchr(opt, '=') + 1; + strlcpy(buf,opt,sizeof(buf)); space = strchr(buf, ' '); if (space) @@ -200,19 +211,12 @@ void __init disable_early_printk(void) if (!early_console_initialized || !early_console) return; if (!keep_early) { - printk("disabling early console...\n"); + printk("disabling early console\n"); unregister_console(early_console); early_console_initialized = 0; } else { - printk("keeping early console.\n"); + printk("keeping early console\n"); } } -/* syntax: earlyprintk=vga - earlyprintk=serial[,ttySn[,baudrate]] - Append ,keep to not disable it when the real console takes over. - Only vga or serial at a time, not both. - Currently only ttyS0 and ttyS1 are supported. - Interaction with the standard serial driver is not very good. - The VGA output is eventually overwritten by the real console. */ -__setup("earlyprintk=", setup_early_printk); +__setup("earlyprintk=", setup_early_printk); --- linux-2.6.3/arch/x86_64/kernel/head64.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/x86_64/kernel/head64.c 2004-02-17 22:08:46.000000000 -0800 @@ -83,9 +83,9 @@ void __init x86_64_start_kernel(char * r /* default console: */ if (!strstr(saved_command_line, "console=")) strcat(saved_command_line, " console=tty0"); - s = strstr(saved_command_line, "earlyprintk="); + s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) - setup_early_printk(s+12); + setup_early_printk(s); #ifdef CONFIG_DISCONTIGMEM s = strstr(saved_command_line, "numa="); if (s != NULL) --- linux-2.6.3/arch/x86_64/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/irq.c 2004-02-17 22:25:41.000000000 -0800 @@ -405,6 +405,9 @@ out: spin_unlock(&desc->lock); irq_exit(); + + kgdb_process_breakpoint(); + return 1; } @@ -826,7 +829,7 @@ static cpumask_t irq_affinity [NR_IRQS] static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -864,7 +867,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/kernel/kgdb_stub.c 2004-02-17 22:08:45.000000000 -0800 @@ -0,0 +1,2595 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * X86_64 changes from Andi Kleen's patch merged by Jim Houston + * (jim.houston@ccur.com). If it works thank Andi if its broken + * blame me. + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define Dearly_printk(x...) +int kgdb_enabled = 0; + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES (NUMREGS * sizeof(unsigned long)) +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + * + * Could add XMM and segment registers here. + */ +enum regnames {_RAX, + _RBX, + _RCX, + _RDX, + _RSI, + _RDI, + _RBP, + _RSP, + _R8, + _R9, + _R10, + _R11, + _R12, + _R13, + _R14, + _R15, + _PC, + _PS, + NUMREGS }; + + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned long rsp; + unsigned long array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned long OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movq %rax,%rsp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addq $-8,%rbx\n\t" + "movq (%rbx), %rax\n\t" + "pushq %rax\n\t" + "cmpq %rsp,%rcx\n\t" + "jne 1b\n\t" + "popq %rax\n\t" + "popq %rbx\n\t" + "popq %rcx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static char lbuf[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("RAX=%016lx RBX=%016lx RCX=%016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX=%016lx RSI=%016lx RDI=%016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP=%016lx PS=%016lx PC=%016lx\n", + regs->rbp, regs->eflags, regs->rip); + printk("R8=%016lx R9=%016lx R10=%016lx\n", + regs->r8, regs->r9, regs->r10); + printk("R11=%016lx R12=%016lx R13=%016lx\n", + regs->r11, regs->r12, regs->r13); + printk("R14=%016lx R15=%016lx RSP=%016lx\n", + regs->r14, regs->r15, regs->rsp); +} + +#define NEW_esp fn_call_lookaside[trap_cpu].rsp + +static void +regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_RAX] = regs->rax; + gdb_regs[_RBX] = regs->rbx; + gdb_regs[_RCX] = regs->rcx; + gdb_regs[_RDX] = regs->rdx; + gdb_regs[_RSI] = regs->rsi; + gdb_regs[_RDI] = regs->rdi; + gdb_regs[_RBP] = regs->rbp; + gdb_regs[ _PS] = regs->eflags; + gdb_regs[ _PC] = regs->rip; + gdb_regs[ _R8] = regs->r8; + gdb_regs[ _R9] = regs->r9; + gdb_regs[_R10] = regs->r10; + gdb_regs[_R11] = regs->r11; + gdb_regs[_R12] = regs->r12; + gdb_regs[_R13] = regs->r13; + gdb_regs[_R14] = regs->r14; + gdb_regs[_R15] = regs->r15; + gdb_regs[_RSP] = regs->rsp; + + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ +} + +static void +gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->rax = gdb_regs[_RAX] ; + regs->rbx = gdb_regs[_RBX] ; + regs->rcx = gdb_regs[_RCX] ; + regs->rdx = gdb_regs[_RDX] ; + regs->rsi = gdb_regs[_RSI] ; + regs->rdi = gdb_regs[_RDI] ; + regs->rbp = gdb_regs[_RBP] ; + regs->eflags = gdb_regs[ _PS] ; + regs->rip = gdb_regs[ _PC] ; + regs->r8 = gdb_regs[ _R8] ; + regs->r9 = gdb_regs[ _R9] ; + regs->r10 = gdb_regs[ _R10] ; + regs->r11 = gdb_regs[ _R11] ; + regs->r12 = gdb_regs[ _R12] ; + regs->r13 = gdb_regs[ _R13] ; + regs->r14 = gdb_regs[ _R14] ; + regs->r15 = gdb_regs[ _R15] ; + #if 0 /* can't change these */ + regs->rsp = gdb_regs[_RSP] ; + regs->ss = gdb_regs[ _SS] ; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif +} /* gdb_regs_to_regs */ + +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; +extern void thread_return(void); + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, unsigned long *gdb_regs) +{ + unsigned long **rbp, *rsp, *rsp0, pc; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_RSP] = + (unsigned long)&kgdb_info.cpus_waiting[i].regs->rsp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + rsp = (unsigned long *)p->thread.rsp; + rbp = (unsigned long **)rsp[0]; + rsp += 2; + gdb_regs[_PC] = (unsigned long)thread_return; + gdb_regs[_RBP] = (unsigned long)rbp; + gdb_regs[_RSP] = (unsigned long)rsp; + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + rsp0 = (unsigned long *)p->thread.rsp0; + if (rsp < (unsigned long *) p->thread_info || rsp > rsp0) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (*rbp < rsp || *rbp > rsp0) + break; + rbp = (unsigned long **)*rbp; + rsp = (unsigned long *)rbp; + pc = rsp[1]; + + if (pc < first_sched || pc >= last_sched) + break; + gdb_regs[_PC] = (unsigned long)pc; + gdb_regs[_RSP] = (unsigned long)rsp; + gdb_regs[_RBP] = (unsigned long)rbp; + } while (count++ < 16); + return; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* returns nonzero if any memory access fails. */ +int mem2hex( char* mem, char* buf, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (ret) { + Dearly_printk("mem2hex: fault at accessing %p\n", mem); + } + return(ret); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return nonzero if any memory access fails. */ +int hex2mem( char* buf, char* mem, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} +#endif + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToLong(char **ptr, unsigned long *value) +{ + int numChars = 0; + int hexValue; + + *value = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *value = (*value << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + if (!cpu_online(pid - PID_MAX)) + return NULL; + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned long addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned long hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned long dr7; + + asm volatile ("movq %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned long addr0, addr1, addr2, addr3; + asm volatile ("movq %%db0, %0\n" + "movq %%db1, %1\n" + "movq %%db2, %2\n" + "movq %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movq %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movq %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movq %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movq %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movq %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned long flags; + int cpu; + if (!kgdb_enabled) + return 0; + cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned long dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * The ThreadExtraInfo query allows us to pass an arbitrary string + * for display with the "info threads" command. + */ + +void +print_extra_info(task_t *p, char *buf) +{ + if (!p) { + sprintf(buf, "Invalid thread"); + return; + } + sprintf(buf, "0x%p %8d %4d %c %s", + (void *)p, p->parent->pid, + task_cpu(p), + (p->state == 0) ? (task_curr(p)?'R':'r') : + (p->state < 0) ? 'U' : + (p->state & TASK_UNINTERRUPTIBLE) ? 'D' : + (p->state & TASK_STOPPED || p->ptrace & PT_PTRACED) ? 'T' : + (p->state & (TASK_ZOMBIE | TASK_DEAD)) ? 'Z' : + (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?', + p->comm); +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + struct task_struct *p; + unsigned long addr, length; + unsigned long breakno, breaktype; + char *ptr; + unsigned long newPC; + threadref thref; + unsigned long threadid, tmpid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + unsigned long gdb_regs[NUMREGS]; + unsigned long dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->cs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time, end_time, dum; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movq %0,%%db7" + : /* no output */ + :"r"(0UL)); + + asm volatile ("movq %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +#if 0 +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } +#endif + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (unsigned long) (&linux_regs->rsp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). + */ + unsigned long regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToLong(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + if (regno >= NUMREGS) + break; + hex2mem(ptr, (char *) &gdb_regs[regno], + 8); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && (hexToLong(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + if (mem2hex((char *) addr, + remcomOutBuffer, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToLong(&ptr, &length)) && (*(ptr++) == ':')) { + if (hex2mem(ptr, (char *) addr, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%lx\n", addr); + + regs.rip = addr; + } + + newPC = regs.rip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movq %0, %%db6\n"::"r" (0UL)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + /* If start flag set start at 0. */ + if (remcomInBuffer[2] == '1') + threadid = 0; + else + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T': + ptr = &remcomInBuffer[0]; + if (strncmp(ptr, "qThreadExtraInfo,", + strlen("qThreadExtraInfo,")) == 0) { + ptr += strlen("qThreadExtraInfo,"); + hexToLong(&ptr, &tmpid); + p = getthread(tmpid); + print_extra_info(p, lbuf); + mem2hex(lbuf, remcomOutBuffer, + strlen(lbuf)); + } + break; +#if 0 + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } +#endif + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + ptr++; + hexToLong(&ptr, &breaktype); + ptr++; + hexToLong(&ptr, &length); + ptr++; + hexToLong(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + unsigned long *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (unsigned long); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->cs; + *--ptr = linux_regs->rip; + *--ptr = linux_regs->rcx; + *--ptr = linux_regs->rbx; + *--ptr = linux_regs->rax; + linux_regs->rcx = NEW_esp - (sizeof (unsigned long) * 6); + linux_regs->rbx = (unsigned long) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->rip = (unsigned long) fn_call_stub; + } else { + linux_regs->rip = (unsigned long) fn_rtn_stub; + linux_regs->rax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + local_irq_restore(flags); + return (1); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + local_irq_restore(flags); + return (1); +} + +#undef regs +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + struct die_args *d = ptr; + + if (!kgdb_enabled || (cmd == DIE_DEBUG && user_mode(d->regs))) + return NOTIFY_DONE; + if (cmd == DIE_NMI_IPI) { + if (in_kgdb(d->regs)) + return NOTIFY_BAD; + } else if (kgdb_handle_exception(d->trapnr, d->signr, d->err, d->regs)) + return NOTIFY_BAD; /* skip */ + + return NOTIFY_DONE; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + .priority = 0, +}; + +void set_debug_traps(void) +{ + static int initialized = 0; + + if (!initialized) { + initialized = 1; + notifier_chain_register(&die_chain, &kgdb_notifier); + } +} + +/* + * Provide the command line "gdb" initial break + */ +int __init kgdb_initial_break(char * str) +{ + if (*str == '\0'){ + breakpoint(); + return 1; + } + return 0; +} +__setup("gdb",kgdb_initial_break); + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +void breakpoint(void) +{ + + set_debug_traps(); + kgdb_enabled = 1; +#if 0 + /* + * These calls were not enough to allow breakpoint to be + * called before trap_init(). I moved the argument parsing + * after trap_init() and it seems to work. + */ + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); +#endif + + BREAKPOINT; +} + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + kgdb_enabled = 1; + BREAKPOINT; + } +} + --- linux-2.6.3/arch/x86_64/kernel/Makefile 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/Makefile 2004-02-17 22:08:45.000000000 -0800 @@ -24,6 +24,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o a obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-y += topology.o --- linux-2.6.3/arch/x86_64/kernel/smp.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/smp.c 2004-02-17 22:08:45.000000000 -0800 @@ -362,6 +362,18 @@ void smp_send_reschedule(int cpu) send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif + /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. --- linux-2.6.3/arch/x86_64/kernel/sys_x86_64.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/x86_64/kernel/sys_x86_64.c 2004-02-17 22:25:28.000000000 -0800 @@ -4,6 +4,7 @@ #include #include +#include #include #include #include --- linux-2.6.3/arch/x86_64/kernel/traps.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/kernel/traps.c 2004-02-17 22:08:45.000000000 -0800 @@ -45,6 +45,9 @@ #include #include +#ifdef CONFIG_KGDB +#include +#endif extern struct gate_struct idt_table[256]; --- linux-2.6.3/arch/x86_64/kernel/x8664_ksyms.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/kernel/x8664_ksyms.c 2004-02-17 22:25:22.000000000 -0800 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -180,8 +181,6 @@ EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(__memcpy); /* syscall export needed for misdesigned sound drivers. */ -extern ssize_t sys_read(unsigned int fd, char * buf, size_t count); -extern off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin); EXPORT_SYMBOL(sys_read); EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(sys_open); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/lib/kgdb_serial.c 2004-02-17 22:08:45.000000000 -0800 @@ -0,0 +1,490 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + set_debug_traps(); + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#define kgdb_mem_init_done() (1) + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.3/arch/x86_64/lib/Makefile 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/x86_64/lib/Makefile 2004-02-17 22:08:45.000000000 -0800 @@ -11,3 +11,4 @@ lib-y += memcpy.o memmove.o memset.o cop lib-$(CONFIG_IO_DEBUG) += iodebug.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o --- linux-2.6.3/arch/x86_64/Makefile 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/Makefile 2004-02-17 22:25:07.000000000 -0800 @@ -52,7 +52,10 @@ CFLAGS += -Wno-sign-compare ifneq ($(CONFIG_DEBUG_INFO),y) CFLAGS += -fno-asynchronous-unwind-tables endif -#CFLAGS += $(call check_gcc,-funit-at-a-time,) + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call check_gcc,-funit-at-a-time,) head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o --- linux-2.6.3/Documentation/binfmt_misc.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/binfmt_misc.txt 2004-02-17 22:25:09.000000000 -0800 @@ -15,7 +15,7 @@ First you must mount binfmt_misc: mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc To actually register a new binary type, you have to set up a string looking like -:name:type:offset:magic:mask:interpreter: (where you can choose the ':' upon +:name:type:offset:magic:mask:interpreter:flags (where you can choose the ':' upon your needs) and echo it to /proc/sys/fs/binfmt_misc/register. Here is what the fields mean: - 'name' is an identifier string. A new /proc file will be created with this @@ -34,6 +34,28 @@ Here is what the fields mean: The mask is anded with the byte sequence of the file. - 'interpreter' is the program that should be invoked with the binary as first argument (specify the full path) + - 'flags' is an optional field that controls several aspects of the invocation + of the interpreter. It is a string of capital letters, each controls a certain + aspect. The following flags are supported - + 'P' - preserve-argv[0]. Legacy behavior of binfmt_misc is to overwrite the + original argv[0] with the full path to the binary. When this flag is + included, binfmt_misc will add an argument to the argument vector for + this purpose, thus preserving the original argv[0]. + 'O' - open-binary. Legacy behavior of binfmt_misc is to pass the full path + of the binary to the interpreter as an argument. When this flag is + included, binfmt_misc will open the file for reading and pass its + descriptor as an argument, instead of the full path, thus allowing + the interpreter to execute non-readable binaries. This feature should + be used with care - the interpreter has to be trusted not to emit + the contents of the non-readable binary. + 'C' - credentials. Currently, the behavior of binfmt_misc is to calculate + the credentials and security token of the new process according to + the interpreter. When this flag is included, these attributes are + calculated according to the binary. It also implies the 'O' flag. + This feature should be used with care as the interpreter + will run with root permissions when a setuid binary owned by root + is run with binfmt_misc. + There are some restrictions: - the whole register string may not exceed 255 characters @@ -83,9 +105,9 @@ If you want to pass special arguments to write a wrapper script for it. See Documentation/java.txt for an example. -Your interpreter should NOT look in the PATH for the filename; the -kernel passes it the full filename to use. Using the PATH can cause -unexpected behaviour and be a security hazard. +Your interpreter should NOT look in the PATH for the filename; the kernel +passes it the full filename (or the file descriptor) to use. Using $PATH can +cause unexpected behaviour and can be a security hazard. There is a web page about binfmt_misc at --- linux-2.6.3/Documentation/Changes 2004-02-17 20:48:41.000000000 -0800 +++ 25/Documentation/Changes 2004-02-17 22:25:10.000000000 -0800 @@ -216,13 +216,6 @@ chmod 0644 /dev/cpu/microcode as root before you can use this. You'll probably also want to get the user-space microcode_ctl utility to use with this. -If you have compiled the driver as a module you may need to add -the following line: - -alias char-major-10-184 microcode - -to your /etc/modules.conf file. - Powertweak ---------- @@ -259,17 +252,6 @@ mknod /dev/ppp c 108 0 as root. -If you build ppp support as modules, you will need the following in -your /etc/modules.conf file: - -alias char-major-108 ppp_generic -alias /dev/ppp ppp_generic -alias tty-ldisc-3 ppp_async -alias tty-ldisc-14 ppp_synctty -alias ppp-compress-21 bsd_comp -alias ppp-compress-24 ppp_deflate -alias ppp-compress-26 ppp_deflate - If you use devfsd and build ppp support as modules, you will need the following in your /etc/devfsd.conf file: --- linux-2.6.3/Documentation/CodingStyle 2003-07-13 21:44:34.000000000 -0700 +++ 25/Documentation/CodingStyle 2004-02-17 22:26:01.000000000 -0800 @@ -1,42 +1,75 @@ - Linux kernel coding style + Linux kernel coding style This is a short document describing the preferred coding style for the linux kernel. Coding style is very personal, and I won't _force_ my views on anybody, but this is what goes for anything that I have to be able to maintain, and I'd prefer it for most other things too. Please -at least consider the points made here. +at least consider the points made here. First off, I'd suggest printing out a copy of the GNU coding standards, -and NOT read it. Burn them, it's a great symbolic gesture. +and NOT read it. Burn them, it's a great symbolic gesture. Anyway, here goes: Chapter 1: Indentation -Tabs are 8 characters, and thus indentations are also 8 characters. +Tabs are 8 characters, and thus indentations are also 8 characters. There are heretic movements that try to make indentations 4 (or even 2!) characters deep, and that is akin to trying to define the value of PI to -be 3. +be 3. Rationale: The whole idea behind indentation is to clearly define where a block of control starts and ends. Especially when you've been looking at your screen for 20 straight hours, you'll find it a lot easier to see -how the indentation works if you have large indentations. +how the indentation works if you have large indentations. Now, some people will claim that having 8-character indentations makes the code move too far to the right, and makes it hard to read on a 80-character terminal screen. The answer to that is that if you need more than 3 levels of indentation, you're screwed anyway, and should fix -your program. +your program. In short, 8-char indents make things easier to read, and have the added -benefit of warning you when you're nesting your functions too deep. -Heed that warning. +benefit of warning you when you're nesting your functions too deep. +Heed that warning. +Don't put multiple statements on a single line unless you have +something to hide: - Chapter 2: Placing Braces + if (condition) do_this; + do_something_everytime; + +Outside of comments, documentation and except in Kconfig, spaces are never +used for indentation, and the above example is deliberately broken. + +Get a decent editor and don't leave whitespace at the end of lines. + + + Chapter 2: Breaking long lines and strings + +Coding style is all about readability and maintainability using commonly +available tools. + +The limit on the length of lines is 80 columns and this is a hard limit. + +Statements longer than 80 columns will be broken into sensible chunks. +Descendants are always substantially shorter than the parent and are placed +substantially to the right. The same applies to function headers with a long +argument list. Long strings are as well broken into shorter strings. + +void fun(int a, int b, int c) +{ + if (condition) + printk(KERN_WARNING "Warning this is a long printk with " + "3 parameters a: %u b: %u " + "c: %u \n", a, b, c); + else + next_statement; +} + + Chapter 3: Placing Braces The other issue that always comes up in C styling is the placement of braces. Unlike the indent size, there are few technical reasons to @@ -59,7 +92,7 @@ opening brace at the beginning of the ne Heretic people all over the world have claimed that this inconsistency is ... well ... inconsistent, but all right-thinking people know that (a) K&R are _right_ and (b) K&R are right. Besides, functions are -special anyway (you can't nest them in C). +special anyway (you can't nest them in C). Note that the closing brace is empty on a line of its own, _except_ in the cases where it is followed by a continuation of the same statement, @@ -79,60 +112,60 @@ and } else { .... } - -Rationale: K&R. + +Rationale: K&R. Also, note that this brace-placement also minimizes the number of empty (or almost empty) lines, without any loss of readability. Thus, as the supply of new-lines on your screen is not a renewable resource (think 25-line terminal screens here), you have more empty lines to put -comments on. +comments on. - Chapter 3: Naming + Chapter 4: Naming C is a Spartan language, and so should your naming be. Unlike Modula-2 and Pascal programmers, C programmers do not use cute names like ThisVariableIsATemporaryCounter. A C programmer would call that variable "tmp", which is much easier to write, and not the least more -difficult to understand. +difficult to understand. HOWEVER, while mixed-case names are frowned upon, descriptive names for global variables are a must. To call a global function "foo" is a -shooting offense. +shooting offense. GLOBAL variables (to be used only if you _really_ need them) need to have descriptive names, as do global functions. If you have a function that counts the number of active users, you should call that -"count_active_users()" or similar, you should _not_ call it "cntusr()". +"count_active_users()" or similar, you should _not_ call it "cntusr()". Encoding the type of a function into the name (so-called Hungarian notation) is brain damaged - the compiler knows the types anyway and can check those, and it only confuses the programmer. No wonder MicroSoft -makes buggy programs. +makes buggy programs. LOCAL variable names should be short, and to the point. If you have -some random integer loop counter, it should probably be called "i". +some random integer loop counter, it should probably be called "i". Calling it "loop_counter" is non-productive, if there is no chance of it being mis-understood. Similarly, "tmp" can be just about any type of -variable that is used to hold a temporary value. +variable that is used to hold a temporary value. If you are afraid to mix up your local variable names, you have another -problem, which is called the function-growth-hormone-imbalance syndrome. -See next chapter. +problem, which is called the function-growth-hormone-imbalance syndrome. +See next chapter. - - Chapter 4: Functions + + Chapter 5: Functions Functions should be short and sweet, and do just one thing. They should fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24, -as we all know), and do one thing and do that well. +as we all know), and do one thing and do that well. The maximum length of a function is inversely proportional to the complexity and indentation level of that function. So, if you have a conceptually simple function that is just one long (but simple) case-statement, where you have to do lots of small things for a lot of -different cases, it's OK to have a longer function. +different cases, it's OK to have a longer function. However, if you have a complex function, and you suspect that a less-than-gifted first-year high-school student might not even @@ -140,41 +173,78 @@ understand what the function is all abou maximum limits all the more closely. Use helper functions with descriptive names (you can ask the compiler to in-line them if you think it's performance-critical, and it will probably do a better job of it -than you would have done). +than you would have done). Another measure of the function is the number of local variables. They shouldn't exceed 5-10, or you're doing something wrong. Re-think the function, and split it into smaller pieces. A human brain can generally easily keep track of about 7 different things, anything more and it gets confused. You know you're brilliant, but maybe you'd like -to understand what you did 2 weeks from now. +to understand what you did 2 weeks from now. + + + Chapter 6: Centralized exiting of functions +Albeit deprecated by some people, the equivalent of the goto statement is +used frequently by compilers in form of the unconditional jump instruction. - Chapter 5: Commenting +The goto statement comes in handy when a function exits from multiple +locations and some common work such as cleanup has to be done. + +The rationale is: + +- unconditional statements are easier to understand and follow +- nesting is reduced +- errors by not updating individual exit points when making + modifications are prevented +- saves the compiler work to optimize redundant code away ;) + +int fun(int ) +{ + int result = 0; + char *buffer = kmalloc(SIZE); + + if (buffer == NULL) + return -ENOMEM; + + if (condition1) { + while (loop1) { + ... + } + result = 1; + goto out; + } + ... +out: + kfree(buffer); + return result; +} + + Chapter 7: Commenting Comments are good, but there is also a danger of over-commenting. NEVER try to explain HOW your code works in a comment: it's much better to write the code so that the _working_ is obvious, and it's a waste of -time to explain badly written code. +time to explain badly written code. -Generally, you want your comments to tell WHAT your code does, not HOW. +Generally, you want your comments to tell WHAT your code does, not HOW. Also, try to avoid putting comments inside a function body: if the function is so complex that you need to separately comment parts of it, -you should probably go back to chapter 4 for a while. You can make +you should probably go back to chapter 5 for a while. You can make small comments to note or warn about something particularly clever (or ugly), but try to avoid excess. Instead, put the comments at the head of the function, telling people what it does, and possibly WHY it does -it. +it. - Chapter 6: You've made a mess of it + Chapter 8: You've made a mess of it That's OK, we all do. You've probably been told by your long-time Unix user helper that "GNU emacs" automatically formats the C sources for you, and you've noticed that yes, it does do that, but the defaults it uses are less than desirable (in fact, they are worse than random typing - an infinite number of monkeys typing into GNU emacs would never -make a good program). +make a good program). So, you can either get rid of GNU emacs, or change it to use saner values. To do the latter, you can stick the following in your .emacs file: @@ -192,7 +262,7 @@ two lines, this mode will be automatical to add (setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode) - auto-mode-alist)) + auto-mode-alist)) to your .emacs file if you want to have linux-c-mode switched on automagically when you edit source files under /usr/src/linux. @@ -201,33 +271,36 @@ But even if you fail in getting emacs to everything is lost: use "indent". Now, again, GNU indent has the same brain-dead settings that GNU emacs -has, which is why you need to give it a few command line options. +has, which is why you need to give it a few command line options. However, that's not too bad, because even the makers of GNU indent recognize the authority of K&R (the GNU people aren't evil, they are just severely misguided in this matter), so you just give indent the -options "-kr -i8" (stands for "K&R, 8 character indents"). +options "-kr -i8" (stands for "K&R, 8 character indents"), or use +"scripts/Lindent", which indents in the latest style. "indent" has a lot of options, and especially when it comes to comment -re-formatting you may want to take a look at the manual page. But -remember: "indent" is not a fix for bad programming. +re-formatting you may want to take a look at the man page. But +remember: "indent" is not a fix for bad programming. - Chapter 7: Configuration-files + Chapter 9: Configuration-files -For configuration options (arch/xxx/config.in, and all the Config.in files), +For configuration options (arch/xxx/Kconfig, and all the Kconfig files), somewhat different indentation is used. -An indention level of 3 is used in the code, while the text in the config- -options should have an indention-level of 2 to indicate dependencies. The -latter only applies to bool/tristate options. For other options, just use -common sense. An example: - -if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - tristate 'Apply nitroglycerine inside the keyboard (DANGEROUS)' CONFIG_BOOM - if [ "$CONFIG_BOOM" != "n" ]; then - bool ' Output nice messages when you explode' CONFIG_CHEER - fi -fi +Help text is indented with 2 spaces. + +if CONFIG_EXPERIMENTAL + tristate CONFIG_BOOM + default n + help + Apply nitroglycerine inside the keyboard (DANGEROUS) + bool CONFIG_CHEER + depends on CONFIG_BOOM + default y + help + Output nice messages when you explode +endif Generally, CONFIG_EXPERIMENTAL should surround all options not considered stable. All options that are known to trash data (experimental write- @@ -235,20 +308,20 @@ support for file-systems, for instance) experimental options should be denoted (EXPERIMENTAL). - Chapter 8: Data structures + Chapter 10: Data structures Data structures that have visibility outside the single-threaded environment they are created and destroyed in should always have reference counts. In the kernel, garbage collection doesn't exist (and outside the kernel garbage collection is slow and inefficient), which -means that you absolutely _have_ to reference count all your uses. +means that you absolutely _have_ to reference count all your uses. Reference counting means that you can avoid locking, and allows multiple users to have access to the data structure in parallel - and not having to worry about the structure suddenly going away from under them just -because they slept or did something else for a while. +because they slept or did something else for a while. -Note that locking is _not_ a replacement for reference counting. +Note that locking is _not_ a replacement for reference counting. Locking is used to keep data structures coherent, while reference counting is a memory management technique. Usually both are needed, and they are not to be confused with each other. @@ -264,3 +337,87 @@ filesystem code ("struct super_block": s Remember: if another thread can find your data structure, and you don't have a reference count on it, you almost certainly have a bug. + + + Chapter 11: Macros, Enums, Inline functions and RTL + +Names of macros defining constants and labels in enums are capitalized. + +#define CONSTANT 0x12345 + +Enums are preferred when defining several related constants. + +CAPITALIZED macro names are appreciated but macros resembling functions +may be named in lower case. + +Generally, inline functions are preferable to macros resembling functions. + +Macros with multiple statements should be enclosed in a do - while block: + +#define macrofun(a,b,c) \ + do { \ + if (a == 5) \ + do_this(b,c); \ + } while (0) + +Things to avoid when using macros: + +1) macros that affect control flow: + +#define FOO(x) \ + do { \ + if (blah(x) < 0) \ + return -EBUGGERED; \ + } while(0) + +is a _very_ bad idea.  It looks like a function call but exits the "calling" +function; don't break the internal parsers of those who will read the code. + +2) macros that depend on having a local variable with a magic name: + +#define FOO(val) bar(index, val) + +might look like a good thing, but it's confusing as hell when one reads the +code and it's prone to breakage from seemingly innocent changes. + +3) macros with arguments that are used as l-values: FOO(x) = y; will +bite you if somebody e.g. turns FOO into an inline function. + +4) forgetting about precedence: macros defining constants using expressions +must enclose the expression in parentheses. Beware of similar issues with +macros using parameters. + +#define CONSTANT 0x4000 +#define CONSTEXP (CONSTANT | 3) + +The cpp manual deals with macros exhaustively. The gcc internals manual also +covers RTL which is used frequently with assembly language in the kernel. + + + Chapter 12: Printing kernel messages + +Kernel developers like to be seen as literate. Do mind the spelling +of kernel messages to make a good impression. Do not use crippled +words like "dont" and use "do not" or "don't" instead. + +Kernel messages do not have to be terminated with a period. + +Printing numbers in parentheses (%d) adds no value and should be avoided. + + + Chapter 13: References + +The C Programming Language, Second Edition +by Brian W. Kernighan and Dennis M. Ritchie. +Prentice Hall, Inc., 1988. +ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback). + +The Practice of Programming +Brian W. Kernighan, Rob Pike +Addison-Wesley, 1999, ISBN 0-201-61586-X + +GNU manuals - where in compliance with K&R and this text - for cpp, gcc, +gcc internals and indent, all available from www.gnu.org. + +-- +Last updated on 16 February 2004 by a community effort on LKML. --- linux-2.6.3/Documentation/computone.txt 2003-08-08 22:55:10.000000000 -0700 +++ 25/Documentation/computone.txt 2004-02-17 22:25:10.000000000 -0800 @@ -41,11 +41,11 @@ Hardware - If you have an ISA card, find Note the hardware address from the Computone ISA cards installed into the system. These are required for editing ip2.c or editing - /etc/modules.conf, or for specification on the modprobe + /etc/modprobe.conf, or for specification on the modprobe command line. - Note that the /etc/modules.conf file is named /etc/conf.modules - with older versions of the module utilities. + Note that the /etc/modules.conf should be used for older (pre-2.6) + kernels. Software - @@ -58,7 +58,7 @@ b) Run "make config" or "make menuconfig c) Set address on ISA cards then: edit /usr/src/linux/drivers/char/ip2.c if needed or - edit /etc/modules.conf if needed (module). + edit /etc/modprobe.conf if needed (module). or both to match this setting. d) Run "make modules" e) Run "make modules_install" @@ -145,11 +145,11 @@ the irqs are not specified the driver us selects polled mode). If no base addresses are specified the defaults in ip2.c are used. If you are autoloading the driver module with kerneld or kmod the base addresses and interrupt number must also be set in ip2.c -and recompile or just insert and options line in /etc/modules.conf or both. +and recompile or just insert and options line in /etc/modprobe.conf or both. The options line is equivalent to the command line and takes precidence over what is in ip2.c. -/etc/modules.conf sample: +/etc/modprobe.conf sample: options ip2 io=1,0x328 irq=1,10 alias char-major-71 ip2 alias char-major-72 ip2 --- linux-2.6.3/Documentation/crypto/api-intro.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/crypto/api-intro.txt 2004-02-17 22:25:10.000000000 -0800 @@ -68,7 +68,7 @@ Many real examples are available in the CONFIGURATION NOTES As Triple DES is part of the DES module, for those using modular builds, -add the following line to /etc/modules.conf: +add the following line to /etc/modprobe.conf: alias des3_ede des --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/debugging-modules.txt 2004-02-17 22:25:49.000000000 -0800 @@ -0,0 +1,18 @@ +Debugging Modules after 2.6.3 +----------------------------- + +In almost all distributions, the kernel asks for modules which don't +exist, such as "net-pf-10" or whatever. Changing "modprobe -q" to +"succeed" in this case is hacky and breaks some setups, and also we +want to know if it failed for the fallback code for old aliases in +fs/char_dev.c, for example. + +In the past a debugging message which would fill people's logs was +emitted. This debugging message has been removed. The correct way +of debugging module problems is something like this: + +echo '#! /bin/sh' > /tmp/modprobe +echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe +echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe +chmod a+x /tmp/modprobe +echo /tmp/modprobe > /proc/sys/kernel/modprobe --- linux-2.6.3/Documentation/digiboard.txt 2003-06-14 12:18:09.000000000 -0700 +++ 25/Documentation/digiboard.txt 2004-02-17 22:25:10.000000000 -0800 @@ -24,7 +24,7 @@ The driver can be built direct into the The pcxx driver can be configured using the command line feature while loading the kernel with LILO or LOADLIN or, if built as a module, with arguments to insmod and modprobe or with parameters in -/etc/modules.conf for modprobe and kerneld. +/etc/modprobe.conf for modprobe and kerneld. After configuring the driver you need to create the device special files as described in "Device file creation:" below and set the appropriate @@ -91,13 +91,13 @@ devices following that board, you can em The remaining board still uses ttyD8-ttyD15 and cud8-cud15. -Example line for /etc/modules.conf for use with kerneld and as default +Example line for /etc/modprobe.conf for use with kerneld and as default parameters for modprobe: options pcxx io=0x200 numports=8 -For kerneld to work you will likely need to add these two lines to your -/etc/modules.conf: +For kmod to work you will likely need to add these two lines to your +/etc/modprobe.conf: alias char-major-22 pcxx alias char-major-23 pcxx --- linux-2.6.3/Documentation/fb/intel810.txt 2003-06-14 12:18:25.000000000 -0700 +++ 25/Documentation/fb/intel810.txt 2004-02-17 22:25:10.000000000 -0800 @@ -194,7 +194,7 @@ Using the same setup as described above, modprobe i810fb vram=2 xres=1024 bpp=8 hsync1=30 hsync2=55 vsync1=50 \ vsync2=85 accel=1 mtrr=1 -Or just add the following to /etc/modules.conf +Or just add the following to /etc/modprobe.conf options i810fb vram=2 xres=1024 bpp=16 hsync1=30 hsync2=55 vsync1=50 \ vsync2=85 accel=1 mtrr=1 --- linux-2.6.3/Documentation/filesystems/jfs.txt 2003-10-17 15:58:03.000000000 -0700 +++ 25/Documentation/filesystems/jfs.txt 2004-02-17 22:25:50.000000000 -0800 @@ -12,10 +12,9 @@ Barry Arndt barndt@us.ibm.com The following mount options are supported: iocharset=name Character set to use for converting from Unicode to - ASCII. The default is compiled into the kernel as - CONFIG_NLS_DEFAULT. Use iocharset=utf8 for UTF8 - translations. This requires CONFIG_NLS_UTF8 to be set - in the kernel .config file. + ASCII. The default is to do no conversion. Use + iocharset=utf8 for UTF8 translations. This requires + CONFIG_NLS_UTF8 to be set in the kernel .config file. resize=value Resize the volume to blocks. JFS only supports growing a volume, not shrinking it. This option is only @@ -36,18 +35,6 @@ errors=continue Keep going on a filesys errors=remount-ro Default. Remount the filesystem read-only on an error. errors=panic Panic and halt the machine if an error occurs. -JFS TODO list: - -Plans for our near term development items - - - enhance support for logfile on dedicated partition - -Longer term work items - - - implement defrag utility, for online defragmenting - - add quota support - - add support for block sizes (512,1024,2048) - Please send bugs, comments, cards and letters to shaggy@austin.ibm.com. The JFS mailing list can be subscribed to by using the link labeled --- linux-2.6.3/Documentation/filesystems/proc.txt 2003-11-09 16:45:05.000000000 -0800 +++ 25/Documentation/filesystems/proc.txt 2004-02-17 22:26:12.000000000 -0800 @@ -900,6 +900,15 @@ super-nr shows the number of currently a Every mounted file system needs a super block, so if you plan to mount lots of file systems, you may want to increase these numbers. +aio-nr and aio-max-nr +--------------------- + +aio-nr is the running total of the number of events specified on the +io_setup system call for all currently active aio contexts. If aio-nr +reaches aio-max-nr then io_setup will fail with EAGAIN. Note that +raising aio-max-nr does not result in the pre-allocation or re-sizing +of any kernel data structures. + 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats ----------------------------------------------------------- --- linux-2.6.3/Documentation/ftape.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/ftape.txt 2004-02-17 22:25:10.000000000 -0800 @@ -242,15 +242,15 @@ C. Boot and load time configuration Module parameters can be specified either directly when invoking the program 'insmod' at the shell prompt: - insmod ftape.o ft_tracing=4 + modprobe ftape ft_tracing=4 - or by editing the file `/etc/modules.conf' in which case they take + or by editing the file `/etc/modprobe.conf' in which case they take effect each time when the module is loaded with `modprobe' (please refer to the respective manual pages). Thus, you should add a line options ftape ft_tracing=4 - to `/etc/modules.conf` if you intend to increase the debugging + to `/etc/modprobe.conf` if you intend to increase the debugging output of the driver. @@ -298,7 +298,7 @@ C. Boot and load time configuration 5. Example module parameter setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To do the same, but with ftape compiled as a loadable kernel - module, add the following line to `/etc/modules.conf': + module, add the following line to `/etc/modprobe.conf': options ftape ft_probe_fc10=1 ft_tracing=4 --- linux-2.6.3/Documentation/hayes-esp.txt 2003-06-14 12:17:57.000000000 -0700 +++ 25/Documentation/hayes-esp.txt 2004-02-17 22:25:10.000000000 -0800 @@ -109,7 +109,7 @@ option with a space. For example: insmod esp dma=3 trigger=512 The esp module can be automatically loaded when needed. To cause this to -happen, add the following lines to /etc/modules.conf (replacing the last line +happen, add the following lines to /etc/modprobe.conf (replacing the last line with options for your configuration): alias char-major-57 esp --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/andthen 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,100 @@ + +define set_andthen + set var $thp=0 + set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] + set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) + set var $at_oc=kgdb_and_then_count + set var $at_cc=$at_oc +end + +define andthen_next + set var $at_cc=$arg0 +end + +define andthen + andthen_set_edge + if ($at_cc >= $at_oc) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc + output *($thp+($at_cc++ % $at_size )) + printf "\n" + end +end +define andthen_set_edge + set var $at_oc=kgdb_and_then_count + set var $at_low = $at_oc - $at_size + if ($at_low < 0 ) + set var $at_low = 0 + end + if (( $at_cc > $at_oc) || ($at_cc < $at_low)) + printf "Count outside of window, setting count to " + if ($at_cc >= $at_oc) + set var $at_cc = $at_oc + else + set var $at_cc = $at_low + end + printf "%d\n",$at_cc + end +end + +define beforethat + andthen_set_edge + if ($at_cc <= $at_low) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc-1 + output *($thp+(--$at_cc % $at_size )) + printf "\n" + end +end + +document andthen_next + andthen_next + . sets the number of the event to display next. If this event + . is not in the event pool, either andthen or beforethat will + . correct it to the nearest event pool edge. The event pool + . ends at the last event recorded and begins + . prior to that. If beforethat is used next, it will display + . event -1. +. + andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + + +document andthen + andthen +. displays the next event in the list. sets up to display +. the oldest saved event first. +. (optional) count of the event to display. +. note the number of events saved is specified at configure time. +. if events are saved between calls to andthen the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document set_andthen + set_andthen +. sets up to use the and commands. +. if you have defined your own struct, use the above and +. then enter the following: +. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] +. where is the name of your structure. +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document beforethat + beforethat +. displays the next prior event in the list. sets up to +. display the last occuring event first. +. +. note the number of events saved is specified at configure time. +. if events are saved between calls to beforethat the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/debug-nmi.txt 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,37 @@ +Subject: Debugging with NMI +Date: Mon, 12 Jul 1999 11:28:31 -0500 +From: David Grothe +Organization: Gcom, Inc +To: David Grothe + +Kernel hackers: + +Maybe this is old hat, but it is new to me -- + +On an ISA bus machine, if you short out the A1 and B1 pins of an ISA +slot you will generate an NMI to the CPU. This interrupts even a +machine that is hung in a loop with interrupts disabled. Used in +conjunction with kgdb < +ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can +gain debugger control of a machine that is hung in the kernel! Even +without kgdb the kernel will print a stack trace so you can find out +where it was hung. + +The A1/B1 pins are directly opposite one another and the farthest pins +towards the bracket end of the ISA bus socket. You can stick a paper +clip or multi-meter probe between them to short them out. + +I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the +board consists of two rows of wire wrap pins. So I wired a push button +between the A1/B1 pins and now have an ISA board that I can stick into +any ISA bus slot for debugger entry. + +Microsoft has a circuit diagram of a PCI card at +http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to +build one you will have to mail them and ask for the PAL equations. +Nobody makes one comercially. + +[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if +your machine catches fire, it is your problem, not mine.] + +-- Dave (the kgdb guy) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdb-globals.txt 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,71 @@ +Sender: akale@veritas.com +Date: Fri, 23 Jun 2000 19:26:35 +0530 +From: "Amit S. Kale" +Organization: Veritas Software (India) +To: Dave Grothe , linux-kernel@vger.rutgers.edu +CC: David Milburn , + "Edouard G. Parmelan" , + ezannoni@cygnus.com, Keith Owens +Subject: Re: Module debugging using kgdb + +Dave Grothe wrote: +> +> Amit: +> +> There is a 2.4.0 version of kgdb on our ftp site: +> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb +> and loadmodule.sh there. +> +> Have a look at the README file and see if I go it right. If not, send +> me some corrections and I will update it. +> +> Does your version of gdb solve the global variable problem? + +Yes. +Thanks to Elena Zanoni, gdb (developement version) can now calculate +correctly addresses of dynamically loaded object files. I have not been +following gdb developement for sometime and am not sure when symbol +address calculation fix is going to appear in a gdb stable version. + +Elena, any idea when the fix will make it to a prebuilt gdb from a +redhat release? + +For the time being I have built a gdb developement version. It can be +used for module debugging with loadmodule.sh script. + +The problem with calculating of module addresses with previous versions +of gdb was as follows: +gdb did not use base address of a section while calculating address of +a symbol in the section in an object file loaded via 'add-symbol-file'. +It used address of .text segment instead. Due to this addresses of +symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. + +Above mentioned fix allow gdb to use base address of a segment while +calculating address of a symbol in it. It adds a parameter '-s' to +'add-symbol-file' command for specifying base address of a segment. + +loadmodule.sh script works as follows. + +1. Copy a module file to target machine. +2. Load the module on the target machine using insmod with -m parameter. +insmod produces a module load map which contains base addresses of all +sections in the module and addresses of symbols in the module file. +3. Find all sections and their base addresses in the module from +the module map. +4. Generate a script that loads the module file. The script uses +'add-symbol-file' and specifies address of text segment followed by +addresses of all segments in the module. + +Here is an example gdb script produced by loadmodule.sh script. + +add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 +-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 +-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 + +With this command gdb can calculate addresses of symbols in ANY segment +in a module file. + +Regards. +-- +Amit Kale +Veritas Software ( http://www.veritas.com ) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,14 @@ +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 +target remote /dev/ttyS0 +define si +stepi +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip +end +define ni +nexti +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit.hw 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,117 @@ + +#Using ia-32 hardware breakpoints. +# +#4 hardware breakpoints are available in ia-32 processors. These breakpoints +#do not need code modification. They are set using debug registers. +# +#Each hardware breakpoint can be of one of the +#three types: execution, write, access. +#1. An Execution breakpoint is triggered when code at the breakpoint address is +#executed. +#2. A write breakpoint ( aka watchpoints ) is triggered when memory location +#at the breakpoint address is written. +#3. An access breakpoint is triggered when memory location at the breakpoint +#address is either read or written. +# +#As hardware breakpoints are available in limited number, use software +#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. +# +#Length of an access or a write breakpoint defines length of the datatype to +#be watched. Length is 1 for char, 2 short , 3 int. +# +#For placing execution, write and access breakpoints, use commands +#hwebrk, hwwbrk, hwabrk +#To remove a breakpoint use hwrmbrk command. +# +#These commands take following types of arguments. For arguments associated +#with each command, use help command. +#1. breakpointno: 0 to 3 +#2. length: 1 to 3 +#3. address: Memory location in hex ( without 0x ) e.g c015e9bc +# +#Use the command exinfo to find which hardware breakpoint occured. + +#hwebrk breakpointno address +define hwebrk + maintenance packet Y$arg0,0,0,$arg1 +end +document hwebrk + hwebrk
+ Places a hardware execution breakpoint + = 0 - 3 +
= Hex digits without leading "0x". +end + +#hwwbrk breakpointno length address +define hwwbrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwwbrk + hwwbrk
+ Places a hardware write breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwabrk breakpointno length address +define hwabrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwabrk + hwabrk
+ Places a hardware access breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwrmbrk breakpointno +define hwrmbrk + maintenance packet y$arg0 +end +document hwrmbrk + hwrmbrk + = 0 - 3 + Removes a hardware breakpoint +end + +define reboot + maintenance packet r +end +#exinfo +define exinfo + maintenance packet qE +end +document exinfo + exinfo + Gives information about a breakpoint. +end +define get_th + p $th=(struct thread_info *)((int)$esp & ~8191) +end +document get_th + get_tu + Gets and prints the current thread_info pointer, Defines th to be it. +end +define get_cu + p $cu=((struct thread_info *)((int)$esp & ~8191))->task +end +document get_cu + get_cu + Gets and print the "current" value. Defines $cu to be it. +end +define int_off + set var $flags=$eflags + set $eflags=$eflags&~0x200 + end +define int_on + set var $eflags|=$flags&0x200 + end +document int_off + saves the current interrupt state and clears the processor interrupt + flag. Use int_on to restore the saved flag. +end +document int_on + Restores the interrupt flag saved by int_off. +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit-modules 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,146 @@ +# +# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. +# +# This don't work for Linux-2.0 or older. +# +# Author Edouard G. Parmelan +# +# +# Fri Apr 30 20:33:29 CEST 1999 +# First public release. +# +# Major cleanup after experiment Linux-2.0 kernel without success. +# Symbols of a module are not in the correct order, I can't explain +# why :( +# +# Fri Mar 19 15:41:40 CET 1999 +# Initial version. +# +# Thu Jan 6 16:29:03 CST 2000 +# A little fixing by Dave Grothe +# +# Mon Jun 19 09:33:13 CDT 2000 +# Alignment changes from Edouard Parmelan +# +# The basic idea is to find where insmod load the module and inform +# GDB to load the symbol table of the module with the GDB command +# ``add-symbol-file
''. +# +# The Linux kernel holds the list of all loaded modules in module_list, +# this list end with &kernel_module (exactly with module->next == NULL, +# but the last module is not a real module). +# +# Insmod allocates the struct module before the object file. Since +# Linux-2.1, this structure contain his size. The real address of +# the object file is then (char*)module + module->size_of_struct. +# +# You can use three user functions ``mod-list'', ``mod-print-symbols'' +# and ``add-module-symbols''. +# +# mod-list list all loaded modules with the format: +# +# +# As soon as you have found the address of your module, you can +# print its exported symbols (mod-print-symbols) or inform GDB to add +# symbols from your module file (mod-add-symbols). +# +# The argument that you give to mod-print-symbols or mod-add-symbols +# is the from the mod-list command. +# +# When using the mod-add-symbols command you must also give the full +# pathname of the modules object code file. +# +# The command mod-add-lis is an example of how to make this easier. +# You can edit this macro to contain the path name of your own +# favorite module and then use it as a shorthand to load it. You +# still need the module-address, however. +# +# The internal function ``mod-validate'' set the GDB variable $mod +# as a ``struct module*'' if the kernel known the module otherwise +# $mod is set to NULL. This ensure to not add symbols for a wrong +# address. +# +# Have a nice hacking day ! +# +# +define mod-list + set $mod = (struct module*)module_list + # the last module is the kernel, ignore it + while $mod != &kernel_module + printf "%p\t%s\n", (long)$mod, ($mod)->name + set $mod = $mod->next + end +end +document mod-list +List all modules in the form: +Use the as the argument for the other +mod-commands: mod-print-symbols, mod-add-symbols. +end + +define mod-validate + set $mod = (struct module*)module_list + while ($mod != $arg0) && ($mod != &kernel_module) + set $mod = $mod->next + end + if $mod == &kernel_module + set $mod = 0 + printf "%p is not a module\n", $arg0 + end +end +document mod-validate +mod-validate +Internal user-command used to validate the module parameter. +If is a real loaded module, set $mod to it otherwise set $mod to 0. +end + + +define mod-print-symbols + mod-validate $arg0 + if $mod != 0 + set $i = 0 + while $i < $mod->nsyms + set $sym = $mod->syms[$i] + printf "%p\t%s\n", $sym->value, $sym->name + set $i = $i + 1 + end + end +end +document mod-print-symbols +mod-print-symbols +Print all exported symbols of the module. see mod-list +end + + +define mod-add-symbols-align + mod-validate $arg0 + if $mod != 0 + set $mod_base = ($mod->size_of_struct + (long)$mod) + if ($arg2 != 0) && (($mod_base & ($arg2 - 1)) != 0) + set $mod_base = ($mod_base | ($arg2 - 1)) + 1 + end + add-symbol-file $arg1 $mod_base + end +end +document mod-add-symbols-align +mod-add-symbols-align +Load the symbols table of the module from the object file where +first section aligment is . +To retreive alignment, use `objdump -h '. +end + +define mod-add-symbols + mod-add-symbols-align $arg0 $arg1 sizeof(long) +end +document mod-add-symbols +mod-add-symbols +Load the symbols table of the module from the object file. +Default alignment is 4. See mod-add-symbols-align. +end + +define mod-add-lis + mod-add-symbols-align $arg0 /usr/src/LiS/streams.o 16 +end +document mod-add-lis +mod-add-lis +Does mod-add-symbols /usr/src/LiS/streams.o +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdbeth.txt 2004-02-17 22:08:44.000000000 -0800 @@ -0,0 +1,92 @@ +KGDB over ethernet +================== + +Authors +------- + +Robert Walsh (2.6 port) +wangdi (2.6 port) +Matt Mackall (netpoll api) +San Mehat (original 2.4 code) + + +Introduction +------------ + +KGDB supports debugging over ethernet (kgdboe) via polling of a given +network interface. Most cards should be supported automatically. +Debugging facilities are available as soon as the network driver and +kgdboe have initialized. Unfortunately, this is too late in the boot +process for debugging some issues, but works quite well for many +others. This should not interfere with normal network usage and +doesn't require a dedicated NIC. + +Terminology +----------- + +This document uses the following terms: + + TARGET: the machine being debugged. + HOST: the machine running gdb. + + +Usage +----- + +You need to use the following command-line option on the TARGET kernel: + + kgdboe=[tgt-port]@/[dev],[host-port]@/[host-macaddr] + + where + tgt-port source for UDP packets (defaults to 6443) + tgt-ip source IP to use (interface address) + dev network interface (eth0) + host-port HOST UDP port (6442) (not really used) + host-ip IP address for HOST machine + host-macaddr ethernet MAC address for HOST (ff:ff:ff:ff:ff:ff) + + examples: + + kgdboe=7000@192.168.0.1/eth1,7001@192.168.0.2/00:05:3C:04:47:5D + this machine is 192.168.0.1 on eth1 + remote machine is 192.168.0.2 with MAC address 00:05:3C:04:47:5D + listen for gdb packets on port 7000 + send unsolicited gdb packets to port 7001 + + kgdboe=@192.168.0.1/,@192.168.0.2/ + this machine is 192.168.0.1 on default interface eth0 + remote machine is 192.168.0.2, use default broadcast MAC address + listen for gdb packets on default port 6443 + send unsolicited gdb packets to port 6442 + +Only packets originating from the configured HOST IP address will be +accepted by the debugger. + +On the HOST side, run gdb as normal and use a remote UDP host as the +target: + + % gdb ./vmlinux + GNU gdb Red Hat Linux (5.3post-0.20021129.18rh) + Copyright 2003 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux-gnu"... + (gdb) target remote udp:HOSTNAME:6443 + +You can now continue as if you were debugging over a serial line. + +Limitations +----------- + +The current release of this code is exclusive of using kgdb on a +serial interface, so you must boot without the kgdboe option to use +serial debugging. Trying to debug the network driver while using it +will prove interesting. + +Bug reports +----------- + +Send bug reports to Robert Walsh and Matt +Mackall . --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdb.txt 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,775 @@ +Last edit: <20030806.1637.12> +This file has information specific to the i386 kgdb option. Other +platforms with the kgdb option may behave in a similar fashion. + +New features: +============ +20030806.1557.37 +This version was made against the 2.6.0-test2 kernel. We have made the +following changes: + +- The getthread() code in the stub calls find_task_by_pid(). It fails + if we are early in the bring up such that the pid arrays have yet to + be allocated. We have added a line to kernel/pid.c to make + "kgdb_pid_init_done" true once the arrays are allocated. This way the + getthread() code knows not to call. This is only used by the thread + debugging stuff and threads will not yet exist at this point in the + boot. + +- For some reason, gdb was not asking for a new thread list when the + "info thread" command was given. We changed to the newer version of + the thread info command and gdb now seems to ask when needed. Result, + we now get all threads in the thread list. + +- We now respond to the ThreadExtraInfo request from gdb with the thread + name from task_struct .comm. This then appears in the thread list. + Thoughts on additional options for this are welcome. Things such as + "has BKL" and "Preempted" come to mind. I think we could have a flag + word that could enable different bits of info here. + +- We now honor, sort of, the C and S commands. These are continue and + single set after delivering a signal. We ignore the signal and do the + requested action. This only happens when we told gdb that a signal + was the reason for entry, which is only done on memory faults. The + result is that you can now continue into the Oops. + +- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, + but it is more exact on what language to use. + +- We added two dwarf2 include files and a bit of code at the end of + entry.S. This does not yet work, so it is disabled. Still we want to + keep track of the code and "maybe" someone out there can fix it. + +- Randy Dunlap sent some fix ups for this file which are now merged. + +- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a + compiler warning if CONFIG_KGDB is off (now who would do that :). + +- Andrew Morton sent a fix for the serial driver which is now merged. + +- Andrew also sent a change to the stub around the cpu managment code + which is also merged. + +- Andrew also sent a patch to make "f" as well as "g" work as SysRq + commands to enter kgdb, merged. + +- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a + "who" field to the spinlock data struct. This is filled with + "current" when ever the spinlock suceeds. Useful if you want to know + who has the lock. + +_ And last, but not least, we fixed the "get_cu" macro to properly get + the current value of "current". + +New features: +============ +20030505.1827.27 +We are starting to align with the sourceforge version, at least in +commands. To this end, the boot command string to start kgdb at +boot time has been changed from "kgdb" to "gdb". + +Andrew Morton sent a couple of patches which are now included as follows: +1.) We now return a flag to the interrupt handler. +2.) We no longer use smp_num_cpus (a conflict with the lock meter). +3.) And from William Lee Irwin III code to make + sure high-mem is set up before we attempt to register our interrupt + handler. +We now include asm/kgdb.h from config.h so you will most likely never +have to include it. It also 'NULLS' the kgdb macros you might have in +your code when CONFIG_KGDB is not defined. This allows you to just +turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. +This include is conditioned on the machine being an x86 so as to not +mess with other archs. + +20020801.1129.03 +This is currently the version for the 2.4.18 (and beyond?) kernel. + +We have several new "features" beginning with this version: + +1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more + waiting and it will pull that guy out of an IRQ off spin lock :) + +2.) We doctored up the code that tells where a task is waiting and + included it so that the "info thread" command will show a bit more + than "schedule()". Try it... + +3.) Added the ability to call a function from gdb. All the standard gdb + issues apply, i.e. if you hit a breakpoint in the function, you are + not allowed to call another (gdb limitation, not kgdb). To help + this capability we added a memory allocation function. Gdb does not + return this memory (it is used for strings that you pass to that function + you are calling from gdb) so we fixed up a way to allow you to + manually return the memory (see below). + +4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the + interrupt flag to now also include the preemption count and the + "in_interrupt" info. The flag is now called "with_pif" to indicate + the order, preempt_count, in_interrupt, flag. The preempt_count is + shifted left by 4 bits so you can read the count in hex by dropping + the low order digit. In_interrupt is in bit 1, and the flag is in + bit 0. + +5.) The command: "p kgdb_info" is now expanded and prints something + like: +(gdb) p kgdb_info +$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, + errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, + cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, + regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} + + Things to note here: a.) used_malloc is the amount of memory that + has been malloc'ed to do calls from gdb. You can reclaim this + memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) + cpus_waiting is now "sized" by the number of CPUs you enter at + configure time in the kgdb configure section. This is NOT used + anywhere else in the system, but it is "nice" here. c.) The task's + "pid" is now in the structure. This is the pid you will need to use + to decode to the thread id to get gdb to look at that thread. + Remember that the "info thread" command prints a list of threads + wherein it numbers each thread with its reference number followed + by the thread's pid. Note that the per-CPU idle threads actually + have pids of 0 (yes, there is more than one pid 0 in an SMP system). + To avoid confusion, kgdb numbers these threads with numbers beyond + the MAX_PID. That is why you see 32768 and above. + +6.) A subtle change, we now provide the complete register set for tasks + that are active on the other CPUs. This allows better trace back on + those tasks. + + And, let's mention what we could not fix. Back-trace from all but the + thread that we trapped will, most likely, have a bogus entry in it. + The problem is that gdb does not recognize the entry code for + functions that use "current" near (at all?) the entry. The compiler + is putting the "current" decode as the first two instructions of the + function where gdb expects to find %ebp changing code. Back trace + also has trouble with interrupt frames. I am talking with Daniel + Jacobowitz about some way to fix this, but don't hold your breath. + +20011220.0050.35 +Major enhancement with this version is the ability to hold one or more +CPUs in an SMP system while allowing the others to continue. Also, by +default only the current CPU is enabled on single-step commands (please +note that gdb issues single-step commands at times other than when you +use the si command). + +Another change is to collect some useful information in +a global structure called "kgdb_info". You should be able to just: + +p kgdb_info + +although I have seen cases where the first time this is done gdb just +prints the first member but prints the whole structure if you then enter +CR (carriage return or enter). This also works: + +p *&kgdb_info + +Here is a sample: +(gdb) p kgdb_info +$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, + vector = 3, print_debug_info = 0} + +"Called_from" is the return address from the current entry into kgdb. +Sometimes it is useful to know why you are in kgdb, for example, was +it an NMI or a real breakpoint? The simple way to interrogate this +return address is: + +l *0xc010732c + +which will print the surrounding few lines of source code. + +"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the +kgdb_ts entries). + +"errcode" and "vector" are other entry parameters which may be helpful on +some traps. + +"print_debug_info" is the internal debugging kgdb print enable flag. Yes, +you can modify it. + +In SMP systems kgdb_info also includes the "cpus_waiting" structure and +"hold_on_step": + +(gdb) p kgdb_info +$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, + vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ + task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, + regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}}} + +"Cpus_waiting" has an entry for each CPU other than the current one that +has been stopped. Each entry contains the task_struct address for that +CPU, the address of the regs for that task and a hold flag. All these +have the proper typing so that, for example: + +p *kgdb_info.cpus_waiting[1].regs + +will print the registers for CPU 1. + +"Hold_on_sstep" is a new feature with this version and comes up set or +true. What this means is that whenever kgdb is asked to single-step all +other CPUs are held (i.e. not allowed to execute). The flag applies to +all but the current CPU and, again, can be changed: + +p kgdb_info.hold_on_sstep=0 + +restores the old behavior of letting all CPUs run during single-stepping. + +Likewise, each CPU has a "hold" flag, which if set, locks that CPU out +of execution. Note that this has some risk in cases where the CPUs need +to communicate with each other. If kgdb finds no CPU available on exit, +it will push a message thru gdb and stay in kgdb. Note that it is legal +to hold the current CPU as long as at least one CPU can execute. + +20010621.1117.09 +This version implements an event queue. Events are signaled by calling +a function in the kgdb stub and may be examined from gdb. See EVENTS +below for details. This version also tightens up the interrupt and SMP +handling to not allow interrupts on the way to kgdb from a breakpoint +trap. It is fine to allow these interrupts for user code, but not +system debugging. + +Version +======= + +This version of the kgdb package was developed and tested on +kernel version 2.4.16. It will not install on any earlier kernels. +It is possible that it will continue to work on later versions +of 2.4 and then versions of 2.5 (I hope). + + +Debugging Setup +=============== + +Designate one machine as the "development" machine. This is the +machine on which you run your compiles and which has your source +code for the kernel. Designate a second machine as the "target" +machine. This is the machine that will run your experimental +kernel. + +The two machines will be connected together via a serial line out +one or the other of the COM ports of the PC. You will need the +appropriate modem eliminator (null modem) cable(s) for this. + +Decide on which tty port you want the machines to communicate, then +connect them up back-to-back using the null modem cable. COM1 is +/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection +with the two machines prior to trying to debug a kernel. Once you +have it working, on the TARGET machine, enter: + +setserial /dev/ttyS0 (or what ever tty you are using) + +and record the port address and the IRQ number. + +On the DEVELOPMENT machine you need to apply the patch for the kgdb +hooks. You have probably already done that if you are reading this +file. + +On your DEVELOPMENT machine, go to your kernel source directory and do +"make Xconfig" where X is one of "x", "menu", or "". If you are +configuring in the standard serial driver, it must not be a module. +Either yes or no is ok, but making the serial driver a module means it +will initialize after kgdb has set up the UART interrupt code and may +cause a failure of the control-C option discussed below. The configure +question for the serial driver is under the "Character devices" heading +and is: + +"Standard/generic (8250/16550 and compatible UARTs) serial support" + +Go down to the kernel debugging menu item and open it up. Enable the +kernel kgdb stub code by selecting that item. You can also choose to +turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler +to put more debug info (like local symbols) in the object file. On the +i386 -g and -ggdb are the same so this option just reduces to "O1". The +-O1 reduces the optimization level. This may be helpful in some cases, +be aware, however, that this may also mask the problem you are looking +for. + +The baud rate. Default is 115200. What ever you choose be sure that +the host machine is set to the same speed. I recommend the default. + +The port. This is the I/O address of the serial UART that you should +have gotten using setserial as described above. The standard COM1 port +(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ +3. + +The port IRQ (see above). + +Stack overflow test. This option makes a minor change in the trap, +system call and interrupt code to detect stack overflow and transfer +control to kgdb if it happens. (Some platforms have this in the +baseline code, but the i386 does not.) + +You can also configure the system to recognize the boot option +"console=kgdb" which if given will cause all console output during +booting to be put thru gdb as well as other consoles. This option +requires that gdb and kgdb be connected prior to sending console output +so, if they are not, a breakpoint is executed to force the connection. +This will happen before any kernel output (it is going thru gdb, right), +and will stall the boot until the connection is made. + +You can also configure in a patch to SysRq to enable the kGdb SysRq. +This request generates a breakpoint. Since the serial port IRQ line is +set up after any serial drivers, it is possible that this command will +work when the control-C will not. + +Save and exit the Xconfig program. Then do "make clean" , "make dep" +and "make bzImage" (or whatever target you want to make). This gets the +kernel compiled with the "-g" option set -- necessary for debugging. + +You have just built the kernel on your DEVELOPMENT machine that you +intend to run on your TARGET machine. + +To install this new kernel, use the following installation procedure. +Remember, you are on the DEVELOPMENT machine patching the kernel source +for the kernel that you intend to run on the TARGET machine. + +Copy this kernel to your target machine using your usual procedures. I +usually arrange to copy development: +/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine +via a LAN based NFS access. That is, I run the cp command on the target +and copy from the development machine via the LAN. Run Lilo (see "man +lilo" for details on how to set this up) on the new kernel on the target +machine so that it will boot! Then boot the kernel on the target +machine. + +On the DEVELOPMENT machine, create a file called .gdbinit in the +directory /usr/src/linux. An example .gdbinit file looks like this: + +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 (or what ever speed you have chosen) +target remote /dev/ttyS0 + + +Change the "echo" and "target" definition so that it specifies the tty +port that you intend to use. Change the "remotebaud" definition to +match the data rate that you are going to use for the com line. + +You are now ready to try it out. + +Boot your target machine with "kgdb" in the boot command i.e. something +like: + +lilo> test kgdb + +or if you also want console output thru gdb: + +lilo> test kgdb console=kgdb + +You should see the lilo message saying it has loaded the kernel and then +all output stops. The kgdb stub is trying to connect with gdb. Start +gdb something like this: + + +On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". +When gdb gets the symbols loaded it will read your .gdbinit file and, if +everything is working correctly, you should see gdb print out a few +lines indicating that a breakpoint has been taken. It will actually +show a line of code in the target kernel inside the kgdb activation +code. + +The gdb interaction should look something like this: + + linux-dev:/usr/src/linux# gdb vmlinux + GDB is free software and you are welcome to distribute copies of it + under certain conditions; type "show copying" to see the conditions. + There is absolutely no warranty for GDB; type "show warranty" for details. + GDB 4.15.1 (i486-slackware-linux), + Copyright 1995 Free Software Foundation, Inc... + breakpoint () at i386-stub.c:750 + 750 } + (gdb) + +You can now use whatever gdb commands you like to set breakpoints. +Enter "continue" to start your target machine executing again. At this +point the target system will run at full speed until it encounters +your breakpoint or gets a segment violation in the kernel, or whatever. + +If you have the kgdb console enabled when you continue, gdb will print +out all the console messages. + +The above example caused a breakpoint relatively early in the boot +process. For the i386 kgdb it is possible to code a break instruction +as the first C-language point in init/main.c, i.e. as the first instruction +in start_kernel(). This could be done as follows: + +#include + breakpoint(); + +This breakpoint() is really a function that sets up the breakpoint and +single-step hardware trap cells and then executes a breakpoint. Any +early hard coded breakpoint will need to use this function. Once the +trap cells are set up they need not be set again, but doing it again +does not hurt anything, so you don't need to be concerned about which +breakpoint is hit first. Once the trap cells are set up (and the kernel +sets them up in due course even if breakpoint() is never called) the +macro: + +BREAKPOINT; + +will generate an inline breakpoint. This may be more useful as it stops +the processor at the instruction instead of in a function a step removed +from the location of interest. In either case must be +included to define both breakpoint() and BREAKPOINT. + +Triggering kgdbstub at other times +================================== + +Often you don't need to enter the debugger until much later in the boot +or even after the machine has been running for some time. Once the +kernel is booted and interrupts are on, you can force the system to +enter the debugger by sending a control-C to the debug port. This is +what the first line of the recommended .gdbinit file does. This allows +you to start gdb any time after the system is up as well as when the +system is already at a breakpoint. (In the case where the system is +already at a breakpoint the control-C is not needed, however, it will +be ignored by the target so no harm is done. Also note the the echo +command assumes that the port speed is already set. This will be true +once gdb has connected, but it is best to set the port speed before you +run gdb.) + +Another simple way to do this is to put the following file in you ~/bin +directory: + +#!/bin/bash +echo -e "\003" > /dev/ttyS0 + +Here, the ttyS0 should be replaced with what ever port you are using. +The "\003" is control-C. Once you are connected with gdb, you can enter +control-C at the command prompt. + +An alternative way to get control to the debugger is to enable the kGdb +SysRq command. Then you would enter Alt-SysRq-g (all three keys at the +same time, but push them down in the order given). To refresh your +memory of the available SysRq commands try Alt-SysRq-=. Actually any +undefined command could replace the "=", but I like to KNOW that what I +am pushing will never be defined. + +Debugging hints +=============== + +You can break into the target machine at any time from the development +machine by typing ^C (see above paragraph). If the target machine has +interrupts enabled this will stop it in the kernel and enter the +debugger. + +There is unfortunately no way of breaking into the kernel if it is +in a loop with interrupts disabled, so if this happens to you then +you need to place exploratory breakpoints or printk's into the kernel +to find out where it is looping. The exploratory breakpoints can be +entered either thru gdb or hard coded into the source. This is very +handy if you do something like: + +if () BREAKPOINT; + + +There is a copy of an e-mail in the Documentation/i386/kgdb/ directory +(debug-nmi.txt) which describes how to create an NMI on an ISA bus +machine using a paper clip. I have a sophisticated version of this made +by wiring a push button switch into a PC104/ISA bus adapter card. The +adapter card nicely furnishes wire wrap pins for all the ISA bus +signals. + +When you are done debugging the kernel on the target machine it is a +good idea to leave it in a running state. This makes reboots faster, +bypassing the fsck. So do a gdb "continue" as the last gdb command if +this is possible. To terminate gdb itself on the development machine +and leave the target machine running, first clear all breakpoints and +continue, then type ^Z to suspend gdb and then kill it with "kill %1" or +something similar. + +If gdbstub Does Not Work +======================== + +If it doesn't work, you will have to troubleshoot it. Do the easy +things first like double checking your cabling and data rates. You +might try some non-kernel based programs to see if the back-to-back +connection works properly. Just something simple like cat /etc/hosts +>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you +if you can send data from one machine to the other. Make sure it works +in both directions. There is no point in tearing out your hair in the +kernel if the line doesn't work. + +All of the real action takes place in the file +/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target +machine that interacts with gdb on the development machine. In gdb you can +turn on a debug switch with the following command: + + set remotedebug + +This will print out the protocol messages that gdb is exchanging with +the target machine. + +Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is +the code that talks to the serial port on the target side. There might +be a problem there. In particular there is a section of this code that +tests the UART which will tell you what UART you have if you define +"PRNT" (just remove "_off" from the #define PRNT_off). To view this +report you will need to boot the system without any beakpoints. This +allows the kernel to run to the point where it calls kgdb to set up +interrupts. At this time kgdb will test the UART and print out the type +it finds. (You need to wait so that the printks are actually being +printed. Early in the boot they are cached, waiting for the console to +be enabled. Also, if kgdb is entered thru a breakpoint it is possible +to cause a dead lock by calling printk when the console is locked. The +stub thus avoids doing printks from breakpoints, especially in the +serial code.) At this time, if the UART fails to do the expected thing, +kgdb will print out (using printk) information on what failed. (These +messages will be buried in all the other boot up messages. Look for +lines that start with "gdb_hook_interrupt:". You may want to use dmesg +once the system is up to view the log. If this fails or if you still +don't connect, review your answers for the port address. Use: + +setserial /dev/ttyS0 + +to get the current port and IRQ information. This command will also +tell you what the system found for the UART type. The stub recognizes +the following UART types: + +16450, 16550, and 16550A + +If you are really desperate you can use printk debugging in the +kgdbstub code in the target kernel until you get it working. In particular, +there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c +named "remote_debug". Compile your kernel with this set to 1, rather +than 0 and the debug stub will print out lots of stuff as it does +what it does. Likewise there are debug printks in the kgdb_serial.c +code that can be turned on with simple changes in the macro defines. + + +Debugging Loadable Modules +========================== + +This technique comes courtesy of Edouard Parmelan + + +When you run gdb, enter the command + +source gdbinit-modules + +This will read in a file of gdb macros that was installed in your +kernel source directory when kgdb was installed. This file implements +the following commands: + +mod-list + Lists the loaded modules in the form + +mod-print-symbols + Prints all the symbols in the indicated module. + +mod-add-symbols + Loads the symbols from the object file and associates them + with the indicated module. + +After you have loaded the module that you want to debug, use the command +mod-list to find the of your module. Then use that +address in the mod-add-symbols command to load your module's symbols. +From that point onward you can debug your module as if it were a part +of the kernel. + +The file gdbinit-modules also contains a command named mod-add-lis as +an example of how to construct a command of your own to load your +favorite module. The idea is to "can" the pathname of the module +in the command so you don't have to type so much. + +Threads +======= + +Each process in a target machine is seen as a gdb thread. gdb thread +related commands (info threads, thread n) can be used. + +ia-32 hardware breakpoints +========================== + +kgdb stub contains support for hardware breakpoints using debugging features +of ia-32(x86) processors. These breakpoints do not need code modification. +They use debugging registers. 4 hardware breakpoints are available in ia-32 +processors. + +Each hardware breakpoint can be of one of the following three types. + +1. Execution breakpoint - An Execution breakpoint is triggered when code + at the breakpoint address is executed. + + As limited number of hardware breakpoints are available, it is + advisable to use software breakpoints ( break command ) instead + of execution hardware breakpoints, unless modification of code + is to be avoided. + +2. Write breakpoint - A write breakpoint is triggered when memory + location at the breakpoint address is written. + + A write or can be placed for data of variable length. Length of + a write breakpoint indicates length of the datatype to be + watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for + 4 byte data. + +3. Access breakpoint - An access breakpoint is triggered when memory + location at the breakpoint address is either read or written. + + Access breakpoints also have lengths similar to write breakpoints. + +IO breakpoints in ia-32 are not supported. + +Since gdb stub at present does not use the protocol used by gdb for hardware +breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros +for hardware breakpoints are described below. + +hwebrk - Places an execution breakpoint + hwebrk breakpointno address +hwwbrk - Places a write breakpoint + hwwbrk breakpointno length address +hwabrk - Places an access breakpoint + hwabrk breakpointno length address +hwrmbrk - Removes a breakpoint + hwrmbrk breakpointno +exinfo - Tells whether a software or hardware breakpoint has occurred. + Prints number of the hardware breakpoint if a hardware breakpoint has + occurred. + +Arguments required by these commands are as follows +breakpointno - 0 to 3 +length - 1 to 3 +address - Memory location in hex digits ( without 0x ) e.g c015e9bc + +SMP support +========== + +When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb +client, all the processors are forced to enter the debugger. Current +thread corresponds to the thread running on the processor where +breakpoint occurred. Threads running on other processor(s) appear +similar to other non-running threads in the 'info threads' output. +Within the kgdb stub there is a structure "waiting_cpus" in which kgdb +records the values of "current" and "regs" for each CPU other than the +one that hit the breakpoint. "current" is a pointer to the task +structure for the task that CPU is running, while "regs" points to the +saved registers for the task. This structure can be examined with the +gdb "p" command. + +ia-32 hardware debugging registers on all processors are set to same +values. Hence any hardware breakpoints may occur on any processor. + +gdb troubleshooting +=================== + +1. gdb hangs +Kill it. restart gdb. Connect to target machine. + +2. gdb cannot connect to target machine (after killing a gdb and +restarting another) If the target machine was not inside debugger when +you killed gdb, gdb cannot connect because the target machine won't +respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. +e.g. echo -e "\003" > /dev/ttyS1 +This forces that target machine into the debugger, after which you +can connect. + +3. gdb cannot connect even after echoing Ctrl+C into serial line +Try changing serial line settings min to 1 and time to 0 +e.g. stty min 1 time 0 < /dev/ttyS1 +Try echoing again + +Check serial line speed and set it to correct value if required +e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + +EVENTS +====== + +Ever want to know the order of things happening? Which CPU did what and +when? How did the spinlock get the way it is? Then events are for +you. Events are defined by calls to an event collection interface and +saved for later examination. In this case, kgdb events are saved by a +very fast bit of code in kgdb which is fully SMP and interrupt protected +and they are examined by using gdb to display them. Kgdb keeps only +the last N events, where N must be a power of two and is defined at +configure time. + + +Events are signaled to kgdb by calling: + +kgdb_ts(data0,data1) + +For each call kgdb records each call in an array along with other info. +Here is the array definition: + +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + int data0; + int data1; +}; + +For SMP machines the CPU is recorded, for all machines the TSC is +recorded (gets a time stamp) as well as the line number and source file +the call was made from. The address of the (from), the "if" (interrupt +flag) and the two data items are also recorded. The macro kgdb_ts casts +the types to int, so you can put any 32-bit values here. There is a +configure option to select the number of events you want to keep. A +nice number might be 128, but you can keep up to 1024 if you want. The +number must be a power of two. An "andthen" macro library is provided +for gdb to help you look at these events. It is also possible to define +a different structure for the event storage and cast the data to this +structure. For example the following structure is defined in kgdb: + +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + struct task_struct *t1; + struct task_struct *t2; +}; + +If you use this for display, the data elements will be displayed as +pointers to task_struct entries. You may want to define your own +structure to use in casting. You should only change the last two items +and you must keep the structure size the same. Kgdb will handle these +as 32-bit ints, but within that constraint you can define a structure to +cast to any 32-bit quantity. This need only be available to gdb and is +only used for casting in the display code. + +Final Items +=========== + +I picked up this code from Amit S. Kale and enhanced it. + +If you make some really cool modification to this stuff, or if you +fix a bug, please let me know. + +George Anzinger + + +Amit S. Kale + + +(First kgdb by David Grothe ) + +(modified by Tigran Aivazian ) + Putting gdbstub into the kernel config menu. + +(modified by Scott Foehner ) + Hooks for entering gdbstub at boot time. + +(modified by Amit S. Kale ) + Threads, ia-32 hw debugging, mp support, console support, + nmi watchdog handling. + +(modified by George Anzinger ) + Extended threads to include the idle threads. + Enhancements to allow breakpoint() at first C code. + Use of module_init() and __setup() to automate the configure. + Enhanced the cpu "collection" code to work in early bring-up. + Added ability to call functions from gdb + Print info thread stuff without going back to schedule() + Now collect the "other" cpus with an IPI/ NMI. --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/loadmodule.sh 2004-02-17 22:08:43.000000000 -0800 @@ -0,0 +1,78 @@ +#/bin/sh +# This script loads a module on a target machine and generates a gdb script. +# source generated gdb script to load the module file at appropriate addresses +# in gdb. +# +# Usage: +# Loading the module on target machine and generating gdb script) +# [foo]$ loadmodule.sh +# +# Loading the module file into gdb +# (gdb) source +# +# Modify following variables according to your setup. +# TESTMACHINE - Name of the target machine +# GDBSCRIPTS - The directory where a gdb script will be generated +# +# Author: Amit S. Kale (akale@veritas.com). +# +# If you run into problems, please check files pointed to by following +# variables. +# ERRFILE - /tmp/.errs contains stderr output of insmod +# MAPFILE - /tmp/.map contains stdout output of insmod +# GDBSCRIPT - $GDBSCRIPTS/load gdb script. + +TESTMACHINE=foo +GDBSCRIPTS=/home/bar + +if [ $# -lt 1 ] ; then { + echo Usage: $0 modulefile + exit +} ; fi + +MODULEFILE=$1 +MODULEFILEBASENAME=`basename $1` + +if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { + MODULEFILE=`pwd`/$MODULEFILE +} fi + +ERRFILE=/tmp/$MODULEFILEBASENAME.errs +MAPFILE=/tmp/$MODULEFILEBASENAME.map +GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME + +function findaddr() { + local ADDR=0x$(echo "$SEGMENTS" | \ + grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ + sed 's/[ ]*[^ ]*$//') + echo $ADDR +} + +function checkerrs() { + if [ "`cat $ERRFILE`" != "" ] ; then { + cat $ERRFILE + exit + } fi +} + +#load the module +echo Copying $MODULEFILE to $TESTMACHINE +rcp $MODULEFILE root@${TESTMACHINE}: + +echo Loading module $MODULEFILE +rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ + > $MAPFILE 2> $ERRFILE +checkerrs + +SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` +TEXTADDR=$(findaddr "\\.text[^.]") +LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" +SEGADDRS=`echo "$SEGMENTS" | awk '//{ + if ($1 != ".text" && $1 != ".this" && + $1 != ".kstrtab" && $1 != ".kmodtab") { + print " -s " $1 " 0x" $3 " " + } +}'` +LOADSTRING="$LOADSTRING $SEGADDRS" +echo Generating script $GDBSCRIPT +echo $LOADSTRING > $GDBSCRIPT --- linux-2.6.3/Documentation/ide.txt 2003-10-17 15:58:03.000000000 -0700 +++ 25/Documentation/ide.txt 2004-02-17 22:25:10.000000000 -0800 @@ -198,12 +198,11 @@ drivers can always be compiled as loadab can only be compiled into the kernel, and the core code (ide.c) can be compiled as a loadable module provided no chipset support is needed. -When using ide.c/ide-tape.c as modules in combination with kerneld, add: +When using ide.c as a module in combination with kmod, add: alias block-major-3 ide-probe - alias char-major-37 ide-tape -respectively to /etc/modules.conf. +to /etc/modprobe.conf. When ide.c is used as a module, you can pass command line parameters to the driver using the "options=" keyword to insmod, while replacing any ',' with --- linux-2.6.3/Documentation/kbuild/modules.txt 2004-01-09 00:04:30.000000000 -0800 +++ 25/Documentation/kbuild/modules.txt 2004-02-17 22:25:42.000000000 -0800 @@ -17,12 +17,52 @@ out-of-the-box support for installation Compiling modules outside the official kernel --------------------------------------------- -Often modules are developed outside the official kernel. -To keep up with changes in the build system the most portable way -to compile a module outside the kernel is to use the following command-line: + +Often modules are developed outside the official kernel. To keep up +with changes in the build system the most portable way to compile a +module outside the kernel is to use the kernel build system, +kbuild. Use the following command-line: make -C path/to/kernel/src SUBDIRS=$PWD modules This requires that a makefile exits made in accordance to -Documentation/kbuild/makefiles.txt. +Documentation/kbuild/makefiles.txt. Read that file for more details on +the build system. + +The following is a short summary of how to write your Makefile to get +you up and running fast. Assuming your module will be called +yourmodule.ko, your code should be in yourmodule.c and your Makefile +should include + +obj-m := yourmodule.o + +If the code for your module is in multiple files that need to be +linked, you need to tell the build system which files to compile. In +the case of multiple files, none of these files can be named +yourmodule.c because doing so would cause a problem with the linking +step. Assuming your code exists in file1.c, file2.c, and file3.c and +you want to build yourmodule.ko from them, your Makefile should +include + +obj-m := yourmodule.o +yourmodule-objs := file1.o file2.o file3.o + +Now for a final example to put it all together. Assuming the +KERNEL_SOURCE environment variable is set to the directory where you +compiled the kernel, a simple Makefile that builds yourmodule.ko as +described above would look like + +# Tells the build system to build yourmodule.ko. +obj-m := yourmodule.o + +# Tells the build system to build these object files and link them as +# yourmodule.o, before building yourmodule.ko. This line can be left +# out if all the code for your module is in one file, yourmodule.c. If +# you are using multiple files, none of these files can be named +# yourmodule.c. +yourmodule-objs := file1.o file2.o file3.o +# Invokes the kernel build system to come back to the current +# directory and build yourmodule.ko. +default: + make -C ${KERNEL_SOURCE} SUBDIRS=`pwd` modules --- linux-2.6.3/Documentation/kernel-parameters.txt 2004-02-03 20:42:34.000000000 -0800 +++ 25/Documentation/kernel-parameters.txt 2004-02-17 22:24:59.000000000 -0800 @@ -174,11 +174,18 @@ running once the system is up. atascsi= [HW,SCSI] Atari SCSI - atkbd.set= [HW] Select keyboard code set - Format: + atkbd.extra= [HW] Enable extra LEDs and keys on IBM RapidAccess, EzKey + and similar keyboards + + atkbd.reset= [HW] Reset keyboard during initialization + + atkbd.set= [HW] Select keyboard code set + Format: (2 = AT (default) 3 = PS/2) + + atkbd.scroll= [HW] Enable scroll wheel on MS Office and similar keyboards + atkbd.softrepeat= [HW] Use software keyboard repeat - atkbd.reset= [HW] Reset keyboard during initialization autotest [IA64] @@ -237,7 +244,7 @@ running once the system is up. Forces specified timesource (if avaliable) to be used when calculating gettimeofday(). If specicified timesource is not avalible, it defaults to PIT. - Format: { pit | tsc | cyclone | ... } + Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable @@ -292,6 +299,9 @@ running once the system is up. devfs= [DEVFS] See Documentation/filesystems/devfs/boot-options. + + dhash_entries= [KNL] + Set number of hash buckets for dentry cache. digi= [HW,SERIAL] IO parameters + enable/disable command. @@ -310,6 +320,23 @@ running once the system is up. dtc3181e= [HW,SCSI] + earlyprintk= [x86, x86_64] + early_printk=vga + early_printk=serial[,ttySn[,baudrate]] + + Append ,keep to not disable it when the real console + takes over. + + Only vga or serial at a time, not both. + + Currently only ttyS0 and ttyS1 are supported. + + Interaction with the standard serial driver is not + very good. + + The VGA output is eventually overwritten by the real + console. + eata= [HW,SCSI] eda= [HW,PS2] @@ -424,6 +451,9 @@ running once the system is up. idle= [HW] Format: idle=poll or idle=halt + ihash_entries= [KNL] + Set number of hash buckets for inode cache. + in2000= [HW,SCSI] See header of drivers/scsi/in2000.c. @@ -873,6 +903,9 @@ running once the system is up. resume= [SWSUSP] Specify the partition device for software suspension + rhash_entries= [KNL,NET] + Set number of hash buckets for route cache + riscom8= [HW,SERIAL] Format: [,[,...]] @@ -1135,6 +1168,9 @@ running once the system is up. tgfx_2= See Documentation/input/joystick-parport.txt. tgfx_3= + thash_entries= [KNL,NET] + Set number of hash buckets for TCP connection + tipar= [HW] See header of drivers/char/tipar.c. --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/laptop-mode.txt 2004-02-17 22:09:07.000000000 -0800 @@ -0,0 +1,480 @@ +How to conserve battery power using laptop-mode +----------------------------------------------- + +Document Author: Bart Samwel (bart@samwel.tk) +Date created: January 2, 2004 + +Introduction +------------ + +Laptopmode is used to minimize the time that the hard disk needs to be spun up, +to conserve battery power on laptops. It has been reported to cause significant +power savings. + +Contents +-------- + +* Introduction +* The short story +* Caveats +* The details +* Tips & Tricks +* Control script +* ACPI integration +* Monitoring tool + + +The short story +--------------- + +If you just want to use it, run the laptop_mode control script (which is included +at the end of this document) as follows: + +# laptop_mode start + +Then set your harddisk spindown time to a relatively low value with hdparm: + +hdparm -S 4 /dev/hda + +The value -S 4 means 20 seconds idle time before spindown. Your harddisk will +now only spin up when a disk cache miss occurs, or at least once every 10 +minutes to write back any pending changes. + +To stop laptop_mode, remount your filesystems with regular commit intervals +(e.g., 5 seconds), and run "laptop_mode stop". + + +Caveats +------- + +* The downside of laptop mode is that you have a chance of losing up + to 10 minutes of work. If you cannot afford this, don't use it! + +* Most desktop hard drives have a very limited lifetime measured in spindown + cycles, typically about 50.000 times (it's usually listed on the spec sheet). + Check your drive's rating, and don't wear down your drive's lifetime if you + don't need to. + +* If you mount some of your ext3/reiserfs filesystems with the -n option, then + the control script will not be able to remount them correctly. You must set + DO_REMOUNTS=0 in the control script, otherwise it will remount them with the + wrong options -- or it will fail because it cannot write to /etc/mtab. + +* If you have your filesystems listed as type "auto" in fstab, like I did, then + the control script will not recognize them as filesystems that need remounting. + +The details +----------- + +Laptop-mode is controlled by the flag /proc/sys/vm/laptop_mode. When this +flag is set, any physical disk read operation (that might have caused the +hard disk to spin up) causes Linux to flush all dirty blocks. The result +of this is that after a disk has spun down, it will not be spun up anymore +to write dirty blocks, because those blocks had already been written +immediately after the most recent read operation + +To increase the effectiveness of the laptop_mode strategy, the laptop_mode +control script increases dirty_expire_centisecs and dirty_writeback_centisecs in +/proc/sys/vm to about 10 minutes (by default), which means that pages that are +dirtied are not forced to be written to disk as often. The control script also +changes the dirty background ratio, so that background writeback of dirty pages +is not done anymore. Combined with a higher commit value (also 10 minutes) for +ext3 or ReiserFS filesystems (also done automatically by the control script), +this results in concentration of disk activity in a small time interval which +occurs only once every 10 minutes, or whenever the disk is forced to spin up by +a cache miss. The disk can then be spun down in the periods of inactivity. + +If you want to find out which process caused the disk to spin up, you can +gather information by setting the flag /proc/sys/vm/block_dump. When this flag +is set, Linux reports all disk read and write operations that take place, and +all block dirtyings done to files. This makes it possible to debug why a disk +needs to spin up, and to increase battery life even more. + +If 10 minutes is too much or too little downtime for you, you can configure +this downtime as follows. In the control script, set the MAX_AGE value to the +maximum number of seconds of disk downtime that you would like. You should +then set your filesystem's commit interval to the same value. The dirty ratio +is also configurable from the control script. + +If you don't like the idea of the control script remounting your filesystems +for you, you can change DO_REMOUNTS to 0 in the script. + +Thanks to Kiko Piris, the control script can be used to enable laptop mode on +both the Linux 2.4 and 2.6 series. + + +Tips & Tricks +------------- + +* Bartek Kania reports getting up to 50 minutes of extra battery life (on top + of his regular 3 to 3.5 hours) using very aggressive power management (hdparm + -B1) and a spindown time of 5 seconds (hdparm -S1). + +* You can spin down the disk while playing MP3, by setting the disk readahead + to 8MB (hdparm -a 16384). Effectively, the disk will read a complete MP3 at + once, and will then spin down while the MP3 is playing. (Thanks to Bartek + Kania.) + +* Drew Scott Daniels observed: "I don't know why, but when I decrease the number + of colours that my display uses it consumes less battery power. I've seen + this on powerbooks too. I hope that this is a piece of information that + might be useful to the Laptop Mode patch or it's users." + + +Control script +-------------- + +Please note that this control script works for the Linux 2.4 and 2.6 series. + +--------------------CONTROL SCRIPT BEGIN------------------------------------------ +#!/bin/sh + +# start or stop laptop_mode, best run by a power management daemon when +# ac gets connected/disconnected from a laptop +# +# install as /sbin/laptop_mode +# +# Contributors to this script: Kiko Piris +# Bart Samwel +# Dax Kelson +# Original Linux 2.4 version by: Jens Axboe + +parse_mount_opts () { + echo "$*" | \ + sed 's/commit=[0-9]*//g' | \ + sed 's/,,*/,/g' | \ + sed 's/^,//' | \ + sed 's/,$//' | \ + cat - +} + +KLEVEL="$(uname -r | cut -c1-3)" +case "$KLEVEL" in + "2.4") + true + ;; + "2.6") + true + ;; + *) + echo "Unhandled kernel level: $KLEVEL ('uname -r' = '$(uname -r)')" + exit 1 + ;; +esac + +# Shall we remount journaled fs. with appropiate commit interval? (1=yes) +DO_REMOUNTS=1 + +# age time, in seconds. should be put into a sysconfig file +MAX_AGE=600 + +# Allowed dirty ratio, in pct. should be put into a sysconfig file as well. +DIRTY_RATIO=40 + +# kernel default dirty buffer age +DEF_AGE=30 +DEF_UPDATE=5 +DEF_DIRTY_BACKGROUND_RATIO=10 +DEF_DIRTY_RATIO=40 + + +if [ ! -e /proc/sys/vm/laptop_mode ]; then + echo "Kernel is not patched with laptop_mode patch." + exit 1 +fi + +if [ ! -w /proc/sys/vm/laptop_mode ]; then + echo "You do not have enough privileges to enable laptop_mode." + exit 1 +fi + +case "$1" in + start) + AGE=$((100*$MAX_AGE)) + echo -n "Starting laptop_mode" + case "$KLEVEL" in + "2.4") + echo "1" > /proc/sys/vm/laptop_mode + echo "30 500 0 0 $AGE $AGE 60 20 0" > /proc/sys/vm/bdflush + ;; + "2.6") + echo "1" > /proc/sys/vm/laptop_mode + echo "$AGE" > /proc/sys/vm/dirty_writeback_centisecs + echo "$AGE" > /proc/sys/vm/dirty_expire_centisecs + echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_ratio + echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_background_ratio + ;; + esac + if [ $DO_REMOUNTS -eq 1 ]; then + cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do + PARSEDOPTS="$(parse_mount_opts "$OPTS")" + case "$FST" in + "ext3") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + "reiserfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + "xfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + esac + done + fi + echo "." + ;; + stop) + U_AGE=$((100*$DEF_UPDATE)) + B_AGE=$((100*$DEF_AGE)) + echo -n "Stopping laptop_mode" + case "$KLEVEL" in + "2.4") + echo "0" > /proc/sys/vm/laptop_mode + echo "30 500 0 0 $U_AGE $B_AGE 60 20 0" > /proc/sys/vm/bdflush + ;; + "2.6") + echo "0" > /proc/sys/vm/laptop_mode + echo "$U_AGE" > /proc/sys/vm/dirty_writeback_centisecs + echo "$B_AGE" > /proc/sys/vm/dirty_expire_centisecs + echo "$DEF_DIRTY_RATIO" > /proc/sys/vm/dirty_ratio + echo "$DEF_DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio + ;; + esac + if [ $DO_REMOUNTS -eq 1 ]; then + cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do + PARSEDOPTS="$(parse_mount_opts "$OPTS")" + case "$FST" in + "ext3") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + "reiserfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + "xfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + esac + done + fi + echo "." + ;; + *) + echo "$0 {start|stop}" + ;; + +esac + +exit 0 + +--------------------CONTROL SCRIPT END-------------------------------------------- + + +ACPI integration +---------------- + +Dax Kelson submitted this so that the ACPI acpid daemon will +kick off the laptop_mode script and run hdparm. + +---------------------------/etc/acpi/events/ac_adapter BEGIN------------------------------------------- +event=ac_adapter +action=/etc/acpi/actions/battery.sh +---------------------------/etc/acpi/events/ac_adapter END------------------------------------------- + +---------------------------/etc/acpi/actions/battery.sh BEGIN------------------------------------------- +#!/bin/sh + +# cpu throttling +# cat /proc/acpi/processor/CPU0/throttling for more info +ACAD_THR=0 +BATT_THR=2 + +# spindown time for HD (man hdparm for valid values) +# I prefer 2 hours for acad and 20 seconds for batt +ACAD_HD=244 +BATT_HD=4 + +# ac/battery event handler + +status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/AC/state` + +case $status in + "on-line") + echo "Setting HD spindown to 2 hours" + /sbin/laptop-mode stop + /sbin/hdparm -S $ACAD_HD /dev/hda > /dev/null 2>&1 + /sbin/hdparm -B 255 /dev/hda > /dev/null 2>&1 + #echo -n $ACAD_CPU:$ACAD_THR > /proc/acpi/processor/CPU0/limit + exit 0 + ;; + "off-line") + echo "Setting HD spindown to 20 seconds" + /sbin/laptop-mode start + /sbin/hdparm -S $BATT_HD /dev/hda > /dev/null 2>&1 + /sbin/hdparm -B 1 /dev/hda > /dev/null 2>&1 + #echo -n $BATT_CPU:$BATT_THR > /proc/acpi/processor/CPU0/limit + exit 0 + ;; +esac +---------------------------/etc/acpi/actions/battery.sh END------------------------------------------- + +Monitoring tool +--------------- + +Bartek Kania submitted this, it can be used to measure how much time your disk +spends spun up/down. + +---------------------------dslm.c BEGIN------------------------------------------- +/* + * Simple Disk SLeep Monitor + * by Bartek Kania + * Licenced under the GPL + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define D(x) x +#else +#define D(x) +#endif + +int endit = 0; + +/* Check if the disk is in powersave-mode + * Most of the code is stolen from hdparm. + * 1 = active, 0 = standby/sleep, -1 = unknown */ +int check_powermode(int fd) +{ + unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0}; + int state; + + if (ioctl(fd, HDIO_DRIVE_CMD, &args) + && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */ + && ioctl(fd, HDIO_DRIVE_CMD, &args)) { + if (errno != EIO || args[0] != 0 || args[1] != 0) { + state = -1; /* "unknown"; */ + } else + state = 0; /* "sleeping"; */ + } else { + state = (args[2] == 255) ? 1 : 0; + } + D(printf(" drive state is: %s\n", state)); + + return state; +} + +char *state_name(int i) +{ + if (i == -1) return "unknown"; + if (i == 0) return "sleeping"; + if (i == 1) return "active"; + + return "internal error"; +} + +char *myctime(time_t time) +{ + char *ts = ctime(&time); + ts[strlen(ts) - 1] = 0; + + return ts; +} + +void measure(int fd) +{ + time_t start_time; + int last_state; + time_t last_time; + int curr_state; + time_t curr_time = 0; + time_t time_diff; + time_t active_time = 0; + time_t sleep_time = 0; + time_t unknown_time = 0; + time_t total_time = 0; + int changes = 0; + float tmp; + + printf("Starting measurements\n"); + + last_state = check_powermode(fd); + start_time = last_time = time(0); + printf(" System is in state %s\n\n", state_name(last_state)); + + while(!endit) { + sleep(1); + curr_state = check_powermode(fd); + + if (curr_state != last_state || endit) { + changes++; + curr_time = time(0); + time_diff = curr_time - last_time; + + if (last_state == 1) active_time += time_diff; + else if (last_state == 0) sleep_time += time_diff; + else unknown_time += time_diff; + + last_state = curr_state; + last_time = curr_time; + + printf("%s: State-change to %s\n", myctime(curr_time), + state_name(curr_state)); + } + } + changes--; /* Compensate for SIGINT */ + + total_time = time(0) - start_time; + printf("\nTotal running time: %lus\n", curr_time - start_time); + printf(" State changed %d times\n", changes); + + tmp = (float)sleep_time / (float)total_time * 100; + printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp); + tmp = (float)active_time / (float)total_time * 100; + printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp); + tmp = (float)unknown_time / (float)total_time * 100; + printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp); +} + +void ender(int s) +{ + endit = 1; +} + +void usage() +{ + puts("usage: dslm [-w