--- linux-2.6.3-rc2/arch/alpha/kernel/core_apecs.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/arch/alpha/kernel/core_apecs.c 2004-02-12 00:39:53.000000000 -0800 @@ -10,20 +10,18 @@ * Code common to all APECS core logic chips. */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include -#include #include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_cia.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_cia.c 2004-02-12 00:39:53.000000000 -0800 @@ -11,23 +11,19 @@ * Code common to all CIA core logic chips. */ -#include -#include -#include -#include -#include - -#include -#include -#include - #define __EXTERN_INLINE inline #include #include #undef __EXTERN_INLINE +#include +#include +#include +#include #include +#include + #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_irongate.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/alpha/kernel/core_irongate.c 2004-02-12 00:39:53.000000000 -0800 @@ -9,27 +9,23 @@ * Code common to all IRONGATE core logic chips. */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include #include #include +#include #include -#include #include -#include #include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - -#include - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_lca.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_lca.c 2004-02-12 00:39:53.000000000 -0800 @@ -8,21 +8,19 @@ * Code common to all LCA core logic chips. */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include #include #include -#include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_marvel.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/alpha/kernel/core_marvel.c 2004-02-12 00:39:53.000000000 -0800 @@ -4,14 +4,11 @@ * Code common to all Marvel based systems. */ -#include - #define __EXTERN_INLINE inline #include #include #undef __EXTERN_INLINE -#include #include #include #include @@ -20,18 +17,15 @@ #include #include #include +#include #include -#include #include -#include #include #include #include #include -#include - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_mcpcia.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_mcpcia.c 2004-02-12 00:39:53.000000000 -0800 @@ -6,7 +6,11 @@ * Code common to all MCbus-PCI Adaptor core logic chipsets */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include @@ -14,13 +18,6 @@ #include #include -#include -#include - -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_polaris.c 2003-06-14 12:18:00.000000000 -0700 +++ 25/arch/alpha/kernel/core_polaris.c 2004-02-12 00:39:53.000000000 -0800 @@ -4,20 +4,18 @@ * POLARIS chip-specific code */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include #include -#include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_t2.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_t2.c 2004-02-12 00:39:53.000000000 -0800 @@ -9,21 +9,19 @@ * Code common to all T2 core logic chips. */ -#include +#define __EXTERN_INLINE +#include +#include +#undef __EXTERN_INLINE + #include #include #include #include #include -#include #include -#define __EXTERN_INLINE -#include -#include -#undef __EXTERN_INLINE - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_titan.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/alpha/kernel/core_titan.c 2004-02-12 00:39:53.000000000 -0800 @@ -4,29 +4,24 @@ * Code common to all TITAN core logic chips. */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include -#include #include #include #include #include #include +#include -#include #include -#include #include #include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - -#include - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_tsunami.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_tsunami.c 2004-02-12 00:39:53.000000000 -0800 @@ -6,23 +6,20 @@ * Code common to all TSUNAMI core logic chips. */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include #include +#include #include -#include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - -#include - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/core_wildfire.c 2003-08-22 19:23:39.000000000 -0700 +++ 25/arch/alpha/kernel/core_wildfire.c 2004-02-12 00:39:53.000000000 -0800 @@ -6,21 +6,19 @@ * Copyright (C) 2000 Andrea Arcangeli SuSE */ -#include +#define __EXTERN_INLINE inline +#include +#include +#undef __EXTERN_INLINE + #include #include #include #include #include -#include #include -#define __EXTERN_INLINE inline -#include -#include -#undef __EXTERN_INLINE - #include "proto.h" #include "pci_impl.h" --- linux-2.6.3-rc2/arch/alpha/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/alpha/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -252,7 +252,7 @@ static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -333,7 +333,7 @@ static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/alpha/kernel/osf_sys.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/alpha/kernel/osf_sys.c 2004-02-12 00:40:57.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include extern int do_pipe(int *); -extern asmlinkage unsigned long sys_brk(unsigned long); /* * Brk needs to return an error. Still support Linux's brk(0) query idiom, @@ -821,7 +821,6 @@ osf_setsysinfo(unsigned long op, void *b affects all sorts of things, like timeval and itimerval. */ extern struct timezone sys_tz; -extern asmlinkage int sys_utimes(char *, struct timeval *); extern int do_adjtimex(struct timex *); struct timeval32 @@ -1315,8 +1314,6 @@ arch_get_unmapped_area(struct file *filp } #ifdef CONFIG_OSF4_COMPAT -extern ssize_t sys_readv(unsigned long, const struct iovec *, unsigned long); -extern ssize_t sys_writev(unsigned long, const struct iovec *, unsigned long); /* Clear top 32 bits of iov_len in the user's buffer for compatibility with old versions of OSF/1 where iov_len --- linux-2.6.3-rc2/arch/arm26/Kconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm26/Kconfig 2004-02-12 00:39:33.000000000 -0800 @@ -216,11 +216,6 @@ source "drivers/input/Kconfig" source "drivers/char/Kconfig" -config KBDMOUSE - bool - depends on ARCH_ACORN && BUSMOUSE=y - default y - source "drivers/media/Kconfig" source "fs/Kconfig" --- linux-2.6.3-rc2/arch/arm26/kernel/armksyms.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/arm26/kernel/armksyms.c 2004-02-12 00:40:54.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -43,14 +44,6 @@ extern void outswb(unsigned int port, co extern void __bad_xchg(volatile void *ptr, int size); /* - * syscalls - */ -extern int sys_write(int, const char *, int); -extern int sys_read(int, char *, int); -extern int sys_lseek(int, off_t, int); -extern int sys_exit(int); - -/* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that * doesn't really matter since they're not versioned). --- linux-2.6.3-rc2/arch/arm26/kernel/sys_arm.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/arm26/kernel/sys_arm.c 2004-02-12 00:40:54.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -138,7 +139,6 @@ out: * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. */ -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); struct sel_arg_struct { unsigned long n; --- linux-2.6.3-rc2/arch/arm/Kconfig 2004-02-09 21:35:50.000000000 -0800 +++ 25/arch/arm/Kconfig 2004-02-12 00:41:18.000000000 -0800 @@ -639,6 +639,8 @@ source "drivers/media/Kconfig" source "fs/Kconfig" +source "arch/arm/oprofile/Kconfig" + source "drivers/video/Kconfig" if ARCH_ACORN || ARCH_CLPS7500 || ARCH_TBOX || ARCH_SHARK || ARCH_SA1100 || PCI --- linux-2.6.3-rc2/arch/arm/kernel/armksyms.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/arm/kernel/armksyms.c 2004-02-12 00:40:54.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -44,14 +45,6 @@ extern void outswb(unsigned int port, co extern void __bad_xchg(volatile void *ptr, int size); /* - * syscalls - */ -extern int sys_write(int, const char *, int); -extern int sys_read(int, char *, int); -extern int sys_lseek(int, off_t, int); -extern int sys_exit(int); - -/* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that * doesn't really matter since they're not versioned). --- linux-2.6.3-rc2/arch/arm/kernel/sys_arm.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/arm/kernel/sys_arm.c 2004-02-12 00:40:54.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -137,7 +138,6 @@ out: * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. */ -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); struct sel_arg_struct { unsigned long n; --- linux-2.6.3-rc2/arch/arm/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/arm/kernel/time.c 2004-02-12 00:41:18.000000000 -0800 @@ -83,6 +83,9 @@ unsigned long long sched_clock(void) */ static inline void do_profile(struct pt_regs *regs) { + + profile_hook(regs); + if (!user_mode(regs) && prof_buffer && current->pid) { --- linux-2.6.3-rc2/arch/arm/Makefile 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/arm/Makefile 2004-02-12 00:41:18.000000000 -0800 @@ -116,6 +116,7 @@ endif core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) +drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/ drivers-$(CONFIG_ARCH_CLPS7500) += drivers/acorn/char/ drivers-$(CONFIG_ARCH_L7200) += drivers/acorn/char/ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/oprofile/init.c 2004-02-12 00:41:18.000000000 -0800 @@ -0,0 +1,22 @@ +/** + * @file init.c + * + * @remark Copyright 2004 Oprofile Authors + * + * @author Zwane Mwaikambo + */ + +#include +#include +#include + +int oprofile_arch_init(struct oprofile_operations **ops) +{ + int ret = -ENODEV; + + return ret; +} + +void oprofile_arch_exit(void) +{ +} --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/oprofile/Kconfig 2004-02-12 00:41:18.000000000 -0800 @@ -0,0 +1,23 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + +endmenu + --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/arm/oprofile/Makefile 2004-02-12 00:41:18.000000000 -0800 @@ -0,0 +1,9 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) init.o --- linux-2.6.3-rc2/arch/cris/arch-v10/drivers/ethernet.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/cris/arch-v10/drivers/ethernet.c 2004-02-12 00:39:30.000000000 -0800 @@ -482,7 +482,7 @@ etrax_ethernet_init(void) /* Register device */ err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3-rc2/arch/h8300/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/h8300/kernel/signal.c 2004-02-12 00:40:57.000000000 -0800 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -46,8 +47,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -asmlinkage long sys_wait4(pid_t pid, unsigned int * stat_addr, int options, - struct rusage * ru); asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs); /* --- linux-2.6.3-rc2/arch/h8300/kernel/sys_h8300.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/h8300/kernel/sys_h8300.c 2004-02-12 00:40:54.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -155,8 +156,6 @@ out: } #endif -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; --- linux-2.6.3-rc2/arch/i386/boot/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/boot/Makefile 2004-02-12 00:41:14.000000000 -0800 @@ -31,6 +31,8 @@ subdir- := compressed host-progs := tools/build +HOSTCFLAGS_build.o := -I$(TOPDIR)/include + # --------------------------------------------------------------------------- $(obj)/zImage: IMAGE_OFFSET := 0x1000 --- linux-2.6.3-rc2/arch/i386/boot/setup.S 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/boot/setup.S 2004-02-12 00:41:23.000000000 -0800 @@ -164,7 +164,7 @@ cmd_line_ptr: .long 0 # (Header versio # can be located anywhere in # low memory 0x10000 or higher. -ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) +ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) # The highest safe address for # the contents of an initrd @@ -776,7 +776,7 @@ end_move_self: # now we are at the r # AMD Elan bug fix by Robert Schwebel. # -#if defined(CONFIG_MELAN) +#if defined(CONFIG_X86_ELAN) movb $0x02, %al # alternate A20 gate outb %al, $0x92 # this works on SC410/SC520 a20_elan_wait: --- linux-2.6.3-rc2/arch/i386/Kconfig 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/Kconfig 2004-02-12 00:41:23.000000000 -0800 @@ -43,6 +43,15 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. +config X86_ELAN + bool "AMD Elan" + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + config X86_VOYAGER bool "Voyager (NCR)" help @@ -130,278 +139,318 @@ config ES7000_CLUSTERED_APIC default y depends on SMP && X86_ES7000 && MPENTIUMIII -choice - prompt "Processor family" - default M686 +if !X86_ELAN -config M386 - bool "386" - ---help--- - This is the processor type of your CPU. This information is used for - optimizing purposes. In order to compile a kernel that can run on - all x86 CPU types (albeit not optimally fast), you can specify - "386" here. - - The kernel will not necessarily run on earlier architectures than - the one you have chosen, e.g. a Pentium optimized kernel will run on - a PPro, but not necessarily on a i486. - - Here are the settings recommended for greatest speed: - - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI - 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels - will run on a 386 class machine. - - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or - SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - - "586" for generic Pentium CPUs lacking the TSC - (time stamp counter) register. - - "Pentium-Classic" for the Intel Pentium. - - "Pentium-MMX" for the Intel Pentium MMX. - - "Pentium-Pro" for the Intel Pentium Pro. - - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. - - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. - - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. - - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). - - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). - - "Crusoe" for the Transmeta Crusoe series. - - "Winchip-C6" for original IDT Winchip. - - "Winchip-2" for IDT Winchip 2. - - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). +menu "Processor support" + +comment "Select all processors your kernel should support" - If you don't know what to do, choose "386". +config CPU_386 + bool "386" + default n + help + Select this for a 386 series processor. -config M486 +config CPU_486 bool "486" + default y help Select this for a 486 series processor, either Intel or one of the compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. -config M586 +config CPU_586 bool "586/K5/5x86/6x86/6x86MX" + default y help - Select this for an 586 or 686 series processor such as the AMD K5, - the Intel 5x86 or 6x86, or the Intel 6x86MX. This choice does not - assume the RDTSC (Read Time Stamp Counter) instruction. + Select this for a non-Intel 586 or 686 series processor such as + the AMD K5 or the Cyrix 6x86MX. + + Several CPUs that have their own options below (e.g. AMD K6, + Duron, Athlon and Opteeron, IDT Winchip, Cyrix III and + VIA C3) do _not_ need this option. -config M586TSC + This choice does not assume the RDTSC (Read Time Stamp Counter) + instruction. + +config CPU_586TSC bool "Pentium-Classic" + default y help Select this for a Pentium Classic processor with the RDTSC (Read - Time Stamp Counter) instruction for benchmarking. + Time Stamp Counter) instruction. -config M586MMX +config CPU_586MMX bool "Pentium-MMX" + default y help Select this for a Pentium with the MMX graphics/multimedia extended instructions. -config M686 +config CPU_686 bool "Pentium-Pro" + default y help - Select this for Intel Pentium Pro chips. This enables the use of - Pentium Pro extended instructions, and disables the init-time guard - against the f00f bug found in earlier Pentiums. + Select this for Intel Pentium Pro chips. -config MPENTIUMII +config CPU_PENTIUMII bool "Pentium-II/Celeron(pre-Coppermine)" + default y help Select this for Intel chips based on the Pentium-II and - pre-Coppermine Celeron core. This option enables an unaligned - copy optimization, compiles the kernel with optimization flags - tailored for the chip, and applies any applicable Pentium Pro - optimizations. + pre-Coppermine Celeron core. -config MPENTIUMIII +config CPU_PENTIUMIII bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + default y help Select this for Intel chips based on the Pentium-III and - Celeron-Coppermine core. This option enables use of some - extended prefetch instructions in addition to the Pentium II - extensions. - -config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Xeon" - help - Select this for Intel Pentium 4 chips. This includes both - the Pentium 4 and P4-based Celeron chips. This option - enables compile flags optimized for the chip, uses the - correct cache shift, and applies any applicable Pentium III - optimizations. + Celeron-Coppermine core. + +config CPU_PENTIUMM + bool "Pentium M" + default y + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config CPU_PENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + default y + help + Select this for Intel Pentium 4 chips. This includes + the Pentium 4, P4-based Celeron and Xeon, and + Pentium-4 M (not Pentium M) chips. -config MK6 +config CPU_K6 bool "K6/K6-II/K6-III" + default y help - Select this for an AMD K6-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD K6, K6-II or K6-III (aka K6-3D). -config MK7 +config CPU_K7 bool "Athlon/Duron/K7" + default y help - Select this for an AMD Athlon K7-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD Athlon K7-family processor. -config MK8 +config CPU_K8 bool "Opteron/Athlon64/Hammer/K8" + default y help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. - -config MELAN - bool "Elan" + Select this for an AMD Opteron or Athlon64 Hammer-family processor. -config MCRUSOE +config CPU_CRUSOE bool "Crusoe" + default y help - Select this for a Transmeta Crusoe processor. Treats the processor - like a 586 with TSC, and sets some GCC optimization flags (like a - Pentium Pro with no alignment requirements). + Select this for a Transmeta Crusoe processor. -config MWINCHIPC6 +config CPU_WINCHIPC6 bool "Winchip-C6" + default y help - Select this for an IDT Winchip C6 chip. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. + Select this for an IDT Winchip C6 chip. -config MWINCHIP2 +config CPU_WINCHIP2 bool "Winchip-2" + default y help - Select this for an IDT Winchip-2. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. + Select this for an IDT Winchip-2. -config MWINCHIP3D +config CPU_WINCHIP3D bool "Winchip-2A/Winchip-3" + default y + help + Select this for an IDT Winchip-2A or 3 with 3dNow! + capabilities. + +config CPU_CYRIXIII + bool "Cyrix III/VIA C3" + default y help - Select this for an IDT Winchip-2A or 3. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment reqirements. Also enable out of order memory - stores for this CPU, which can increase performance of some - operations. - -config MCYRIXIII - bool "CyrixIII/VIA-C3" - help - Select this for a Cyrix III or C3 chip. Presently Linux and GCC - treat this chip as a generic 586. Whilst the CPU is 686 class, - it lacks the cmov extension which gcc assumes is present when - generating 686 code. - Note that Nehemiah (Model 9) and above will not boot with this - kernel due to them lacking the 3DNow! instructions used in earlier - incarnations of the CPU. + Select this for a Cyrix III or VIA C3 chip. + + Note that Nehemiah (Model 9) and above need the next + option instead. -config MVIAC3_2 +config CPU_VIAC3_2 bool "VIA C3-2 (Nehemiah)" + default y help - Select this for a VIA C3 "Nehemiah". Selecting this enables usage - of SSE and tells gcc to treat the CPU as a 686. - Note, this kernel will not boot on older (pre model 9) C3s. + Select this for a VIA C3 "Nehemiah" (model 9 and above). -endchoice +endmenu -config X86_GENERIC - bool "Generic x86 support" - help - Including some tuning for non selected x86 CPUs too. - when it has moderate overhead. This is intended for generic - distributions kernels. +endif + +# +# helper options +# +config CPU_INTEL + bool + depends on CPU_386 || CPU_486 || CPU_586TSC || CPU_686 || CPU_PENTIUMII || CPU_PENTIUMIII || CPU_PENTIUMM || CPU_PENTIUM4 + default y + +config CPU_WINCHIP + bool + depends on CPU_WINCHIPC6 || CPU_WINCHIP2 || CPU_WINCHIP3D + default y + +config CPU_ONLY_K7 + bool + depends on CPU_K7 && !CPU_INTEL && !CPU_K6 && !CPU_K8 && !X86_ELAN && !CPU_CRUSOE && !CPU_WINCHIP && !CPU_CYRIXIII && !CPU_VIAC3_2 + default y + +config CPU_ONLY_K8 + bool + depends on CPU_K8 && !CPU_INTEL && !CPU_K6 && !CPU_K7 && !X86_ELAN && !CPU_CRUSOE && !CPU_WINCHIP && !CPU_CYRIXIII && !CPU_VIAC3_2 + default y + +config CPU_ONLY_WINCHIP + bool + depends on CPU_WINCHIP && !CPU_INTEL && !CPU_K6 && !CPU_K7 && !CPU_K8 && !X86_ELAN && !CPU_CRUSOE && !CPU_CYRIXIII && !CPU_VIAC3_2 + default y # # Define implied options from the CPU selection here # config X86_CMPXCHG bool - depends on !M386 + depends on !CPU_386 default y config X86_XADD bool - depends on !M386 + depends on !CPU_386 default y config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC - default "4" if MELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 - default "6" if MK7 || MK8 + default "7" if CPU_PENTIUM4 + default "6" if CPU_K7 || CPU_K8 || CPU_PENTIUMM + default "5" if CPU_WINCHIP || CPU_CRUSOE || CPU_CYRIXIII || CPU_K6 || CPU_PENTIUMIII || CPU_PENTIUMII || CPU_686 || CPU_586MMX || CPU_586TSC || CPU_586 || CPU_VIAC3_2 + default "4" if X86_ELAN || CPU_486 || CPU_386 config RWSEM_GENERIC_SPINLOCK bool - depends on M386 + depends on CPU_386 default y config RWSEM_XCHGADD_ALGORITHM bool - depends on !M386 + depends on !CPU_386 default y config X86_PPRO_FENCE bool - depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 + depends on CPU_686 default y config X86_F00F_BUG bool - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on CPU_586MMX || CPU_586TSC default y config X86_WP_WORKS_OK bool - depends on !M386 + depends on !CPU_386 default y config X86_INVLPG bool - depends on !M386 + depends on !CPU_386 default y config X86_BSWAP bool - depends on !M386 + depends on !CPU_386 default y config X86_POPAD_OK bool - depends on !M386 + depends on !CPU_386 default y config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 + depends on CPU_WINCHIP || CPU_CYRIXIII || X86_ELAN || CPU_K6 || CPU_586MMX || CPU_586TSC || CPU_586 || CPU_486 || CPU_VIAC3_2 default y -config X86_GOOD_APIC +config X86_BAD_APIC bool - depends on MK7 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 + depends on CPU_586TSC default y config X86_INTEL_USERCOPY bool - depends on MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 + depends on CPU_K7 || CPU_K8 || CPU_PENTIUMII || CPU_PENTIUMIII || CPU_PENTIUMM || CPU_PENTIUM4 default y config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 + depends on !CPU_386 && !CPU_486 && !CPU_586 && !CPU_586TSC && !CPU_586MMX && !X86_ELAN && !CPU_CRUSOE default y config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on !CPU_INTEL && !CPU_K6 && !CPU_K8 && !X86_ELAN && !CPU_CRUSOE && !CPU_WINCHIP && !CPU_VIAC3_2 default y config X86_OOSTORE bool - depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR + depends on CPU_ONLY_WINCHIP && MTRR default y +config X86_4G + bool "4 GB kernel-space and 4 GB user-space virtual memory support" + help + This option is only useful for systems that have more than 1 GB + of RAM. + + The default kernel VM layout leaves 1 GB of virtual memory for + kernel-space mappings, and 3 GB of VM for user-space applications. + This option ups both the kernel-space VM and the user-space VM to + 4 GB. + + The cost of this option is additional TLB flushes done at + system-entry points that transition from user-mode into kernel-mode. + I.e. system calls and page faults, and IRQs that interrupt user-mode + code. There's also additional overhead to kernel operations that copy + memory to/from user-space. The overhead from this is hard to tell and + depends on the workload - it can be anything from no visible overhead + to 20-30% overhead. A good rule of thumb is to count with a runtime + overhead of 20%. + + The upside is the much increased kernel-space VM, which more than + quadruples the maximum amount of RAM supported. Kernels compiled with + this option boot on 64GB of RAM and still have more than 3.1 GB of + 'lowmem' left. Another bonus is that highmem IO bouncing decreases, + if used with drivers that still use bounce-buffers. + + There's also a 33% increase in user-space VM size - database + applications might see a boost from this. + + But the cost of the TLB flushes and the runtime overhead has to be + weighed against the bonuses offered by the larger VM spaces. The + dividing line depends on the actual workload - there might be 4 GB + systems that benefit from this option. Systems with less than 4 GB + of RAM will rarely see a benefit from this option - but it's not + out of question, the exact circumstances have to be considered. + +config X86_SWITCH_PAGETABLES + def_bool X86_4G + +config X86_4G_VM_LAYOUT + def_bool X86_4G + +config X86_UACCESS_INDIRECT + def_bool X86_4G + +config X86_HIGH_ENTRY + def_bool X86_4G + config HPET_TIMER bool "HPET Timer Support" help @@ -459,6 +508,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help @@ -513,7 +572,7 @@ config X86_IO_APIC config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ + depends on !X86_NUMAQ && !CPU_386 && !CPU_486 && !CPU_586 && !X86_ELAN && !CPU_WINCHIPC6 default y config X86_MCE @@ -603,8 +662,6 @@ config MICROCODE To compile this driver as a module, choose M here: the module will be called microcode. - If you use modprobe or kmod you may also want to add the line - 'alias char-major-10-184 microcode' to your /etc/modules.conf file. config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -701,7 +758,7 @@ config X86_PAE # Common NUMA Features config NUMA bool "Numa Memory Allocation Support" - depends on SMP && HIGHMEM64G && (X86_PC || X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI && !ACPI_HT_ONLY)) + depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) @@ -709,8 +766,8 @@ config NUMA comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" depends on X86_NUMAQ && (!HIGHMEM64G || !SMP) -comment "NUMA (Summit) requires SMP, 64GB highmem support, full ACPI" - depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI || ACPI_HT_ONLY) +comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" + depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) config DISCONTIGMEM bool @@ -821,6 +878,19 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y +config REGPARM + bool "Use register arguments (EXPERIMENTAL)" + depends on EXPERIMENTAL + default n + help + Compile the kernel with -mregparm=3. This uses an different ABI + and passes the first three arguments of a function call in registers. + This will probably break binary only modules. + + This feature is only enabled for gcc-3.0 and later - earlier compilers + generate incorrect output with certain kernel constructs when + -mregparm=3 is used. + endmenu @@ -1149,6 +1219,15 @@ config HOTPLUG agent" (/sbin/hotplug) to load modules and set up software needed to use devices as you hotplug them. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + + Say N. + source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" @@ -1177,6 +1256,19 @@ config DEBUG_KERNEL Say Y here if you are developing drivers or trying to debug and identify kernel problems. +config EARLY_PRINTK + bool "Early printk" if EMBEDDED + default y + help + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. + config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL @@ -1231,6 +1323,15 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config SPINLINE + bool "Spinlock inlining" + depends on DEBUG_KERNEL + help + This will change spinlocks from out of line to inline, making them + account cost to the callers in readprofile, rather than the lock + itself (as ".text.lock.filename"). This can be helpful for finding + the callers of locks. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM @@ -1247,20 +1348,208 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + config FRAME_POINTER bool "Compile the kernel with frame pointers" + default KGDB help If you say Y here the resulting kernel image will be slightly larger and slower, but it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame pointers. +config MAGIC_SYSRQ + bool + depends on KGDB_SYSRQ + default y + config X86_FIND_SMP_CONFIG bool depends on X86_LOCAL_APIC || X86_VOYAGER --- linux-2.6.3-rc2/arch/i386/kernel/acpi/boot.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/acpi/boot.c 2004-02-12 00:41:23.000000000 -0800 @@ -141,6 +141,10 @@ acpi_parse_lapic ( acpi_table_print_madt_entry(header); + /* no utility in registering a disabled processor */ + if (processor->flags.enabled == 0) + return 0; + mp_register_lapic ( processor->id, /* APIC ID */ processor->flags.enabled); /* Enabled? */ @@ -335,7 +339,7 @@ acpi_scan_rsdp ( * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } @@ -372,6 +376,37 @@ static int __init acpi_parse_hpet(unsign } #endif +/* detect the location of the ACPI PM Timer */ +#ifdef CONFIG_X86_PM_TIMER +extern u32 pmtmr_ioport; + +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) +{ + struct fadt_descriptor_rev2 *fadt =0; + + fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); + if(!fadt) { + printk(KERN_WARNING PREFIX "Unable to map FADT\n"); + return 0; + } + + if (fadt->revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ + if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + pmtmr_ioport = fadt->xpm_tmr_blk.address; + } else { + /* FADT rev. 1 */ + pmtmr_ioport = fadt->V1_pm_tmr_blk; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); + return 0; +} +#endif + + unsigned long __init acpi_find_rsdp (void) { @@ -444,6 +479,10 @@ acpi_boot_init (void) return result; } +#ifdef CONFIG_X86_PM_TIMER + acpi_table_parse(ACPI_FADT, acpi_parse_fadt); +#endif + #ifdef CONFIG_X86_LOCAL_APIC /* --- linux-2.6.3-rc2/arch/i386/kernel/asm-offsets.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/asm-offsets.c 2004-02-12 00:41:23.000000000 -0800 @@ -4,9 +4,11 @@ * to extract and format the required data. */ +#include #include #include #include "sigframe.h" +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -28,4 +30,17 @@ void foo(void) DEFINE(RT_SIGFRAME_sigcontext, offsetof (struct rt_sigframe, uc.uc_mcontext)); + DEFINE(TI_task, offsetof (struct thread_info, task)); + DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); + DEFINE(TI_flags, offsetof (struct thread_info, flags)); + DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); + DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); + DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); + DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); + DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); + + DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); + DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); } --- linux-2.6.3-rc2/arch/i386/kernel/cpu/centaur.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/i386/kernel/cpu/centaur.c 2004-02-12 00:40:50.000000000 -0800 @@ -246,7 +246,15 @@ static void __init winchip2_protect_mcr( lo&=~0x1C0; /* blank bits 8-6 */ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } -#endif +#endif /* CONFIG_X86_OOSTORE */ + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ static void __init init_c3(struct cpuinfo_x86 *c) { @@ -254,6 +262,24 @@ static void __init init_c3(struct cpuinf /* Test for Centaur Extended Feature Flags presence */ if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= ACE_FCR; /* enable ACE unit */ + wrmsr (MSR_VIA_FCR, lo, hi); + printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + } + + /* enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr (MSR_VIA_RNG, lo, hi); + lo |= RNG_ENABLE; /* enable RNG unit */ + wrmsr (MSR_VIA_RNG, lo, hi); + printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + } + /* store Centaur Extended Feature Flags as * word 5 of the CPU capability bit array */ --- linux-2.6.3-rc2/arch/i386/kernel/cpu/common.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/common.c 2004-02-12 00:41:23.000000000 -0800 @@ -514,12 +514,16 @@ void __init cpu_init (void) set_tss_desc(cpu,t); cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); - load_LDT(&init_mm.context); + if (cpu) + load_LDT(&init_mm.context); /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + if (cpu) + trap_init_virtual_GDT(); + /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c 2004-02-12 00:39:25.000000000 -0800 @@ -28,30 +28,34 @@ * with 'Suspend Modulation OFF Count Register' * and 'Suspend Modulation ON Count Register'. * These registers are 8bit counters that represent the number of - * 32us intervals which the SUSP# pin is asserted/de-asserted to the - * processor. + * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) + * to the processor. * * These counters define a ratio which is the effective frequency * of operation of the system. * - * On Count + * OFF Count * F_eff = Fgx * ---------------------- - * On Count + Off Count + * OFF Count + ON Count * * 0 <= On Count, Off Count <= 255 * * From these limits, we can get register values * - * on_duration + off_duration <= MAX_DURATION - * off_duration = on_duration * (stock_freq - freq) / freq + * off_duration + on_duration <= MAX_DURATION + * on_duration = off_duration * (stock_freq - freq) / freq * - * on_duration = (freq * DURATION) / stock_freq - * off_duration = DURATION - on_duration + * off_duration = (freq * DURATION) / stock_freq + * on_duration = DURATION - off_duration * * *--------------------------------------------------------------------------- * * ChangeLog: + * Dec. 12, 2003 Hiroshi Miura + * - fix on/off register mistake + * - fix cpu_khz calc when it stops cpu modulation. + * * Dec. 11, 2002 Hiroshi Miura * - rewrite for Cyrix MediaGX Cx5510/5520 and * NatSemi Geode Cs5530(A). @@ -233,13 +237,13 @@ static unsigned int gx_validate_speed(un int old_tmp_freq = stock_freq; int tmp_freq; - *on_duration=1; - *off_duration=0; + *off_duration=1; + *on_duration=0; for (i=max_duration; i>0; i--) { - tmp_on = ((khz * i) / stock_freq) & 0xff; - tmp_off = i - tmp_on; - tmp_freq = (stock_freq * tmp_on) / i; + tmp_off = ((khz * i) / stock_freq) & 0xff; + tmp_on = i - tmp_off; + tmp_freq = (stock_freq * tmp_off) / i; /* if this relation is closer to khz, use this. If it's equal, * prefer it, too - lower latency */ if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { @@ -273,42 +277,37 @@ static void gx_set_cpuspeed(unsigned int freqs.new = new_khz; - if (new_khz == stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ - local_irq_save(flags); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, (gx_params->pci_suscfg & ~(SUSMOD))); - pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &(gx_params->pci_suscfg)); - local_irq_restore(flags); - dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - return; - } - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - local_irq_save(flags); - switch (gx_params->cs55x0->device) { - case PCI_DEVICE_ID_CYRIX_5530_LEGACY: - pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; - /* FIXME: need to test other values -- Zwane,Miura */ - pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */ + if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ + switch (gx_params->cs55x0->device) { + case PCI_DEVICE_ID_CYRIX_5530_LEGACY: + pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; + /* FIXME: need to test other values -- Zwane,Miura */ + pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); + + if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */ + suscfg = gx_params->pci_suscfg | SUSMOD; + } else { /* CS5530A,B.. */ + suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + } + break; + case PCI_DEVICE_ID_CYRIX_5520: + case PCI_DEVICE_ID_CYRIX_5510: suscfg = gx_params->pci_suscfg | SUSMOD; - } else { /* CS5530A,B.. */ - suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + default: + local_irq_restore(flags); + dprintk("fatal: try to set unknown chipset.\n"); + return; } - break; - case PCI_DEVICE_ID_CYRIX_5520: - case PCI_DEVICE_ID_CYRIX_5510: - suscfg = gx_params->pci_suscfg | SUSMOD; - break; - default: - local_irq_restore(flags); - dprintk("fatal: try to set unknown chipset.\n"); - return; + } else { + suscfg = gx_params->pci_suscfg & ~(SUSMOD); + gx_params->off_duration = 0; + gx_params->on_duration = 0; + dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); } pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-02-12 00:40:18.000000000 -0800 @@ -54,7 +54,7 @@ config X86_ACPI_CPUFREQ_PROC_INTF config ELAN_CPUFREQ tristate "AMD Elan" - depends on CPU_FREQ_TABLE && MELAN + depends on CPU_FREQ_TABLE && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC400 and SC410 processors. --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/longhaul.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/longhaul.c 2004-02-12 00:39:25.000000000 -0800 @@ -63,11 +63,12 @@ static struct cpufreq_frequency_table *l static unsigned int calc_speed (int mult, int fsb) { - int mhz; - mhz = (mult/10)*fsb; + int khz; + khz = (mult/10)*fsb; if (mult%10) - mhz += fsb/2; - return mhz; + khz += fsb/2; + khz *= 1000; + return khz; } @@ -253,7 +254,7 @@ static int __init longhaul_get_ranges (v highest_speed = calc_speed (maxmult, fsb); lowest_speed = calc_speed (minmult,fsb); dprintk (KERN_INFO PFX "FSB: %dMHz Lowestspeed=%dMHz Highestspeed=%dMHz\n", - fsb, lowest_speed, highest_speed); + fsb, lowest_speed/1000, highest_speed/1000); longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); if(!longhaul_table) @@ -267,7 +268,7 @@ static int __init longhaul_get_ranges (v if (ratio > maxmult || ratio < minmult) continue; longhaul_table[k].frequency = calc_speed (ratio, fsb); - longhaul_table[k].index = (j << 8); + longhaul_table[k].index = j; k++; } --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-12 00:40:06.000000000 -0800 @@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int u32 l, h; cpumask_t cpus_allowed, affected_cpu_map; struct cpufreq_freqs freqs; - int hyperthreading = 0; - int sibling = 0; + int j; if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) @@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int cpus_allowed = current->cpus_allowed; /* only run on CPU to be set, or on its sibling */ - affected_cpu_map = cpumask_of_cpu(cpu); -#ifdef CONFIG_X86_HT - hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2)); - if (hyperthreading) { - sibling = cpu_sibling_map[cpu]; - cpu_set(sibling, affected_cpu_map); - } +#ifdef CONFIG_SMP + affected_cpu_map = cpu_sibling_map[cpu]; +#else + affected_cpu_map = cpumask_of_cpu(cpu); #endif set_cpus_allowed(current, affected_cpu_map); BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map)); @@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int /* notifiers */ freqs.old = stock_freq * l / 8; freqs.new = stock_freq * newstate / 8; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (hyperthreading) { - freqs.cpu = sibling; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } } rdmsr(MSR_IA32_THERM_STATUS, l, h); @@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int set_cpus_allowed(current, cpus_allowed); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - if (hyperthreading) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } return 0; @@ -246,7 +243,7 @@ static int cpufreq_p4_cpu_init(struct cp static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) { cpufreq_frequency_table_put_attr(policy->cpu); - return cpufreq_p4_setdc(policy->cpu, DC_DISABLE); + return 0; } static struct freq_attr* p4clockmod_attr[] = { --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/powernow-k7.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k7.c 2004-02-12 00:39:25.000000000 -0800 @@ -91,18 +91,13 @@ static int check_powernow(void) struct cpuinfo_x86 *c = cpu_data; unsigned int maxei, eax, ebx, ecx, edx; - if (c->x86_vendor != X86_VENDOR_AMD) { - printk (KERN_INFO PFX "AMD processor not detected.\n"); - return 0; - } - - if (c->x86 !=6) { + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { +#ifdef MODULE printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); +#endif return 0; } - printk (KERN_INFO PFX "AMD K7 CPU detected.\n"); - if ((c->x86_model == 6) && (c->x86_mask == 0)) { printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); have_a0 = 1; --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/powernow-k8.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k8.c 2004-02-12 00:39:25.000000000 -0800 @@ -31,7 +31,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.00.08 - September 26, 2003" +#define VERSION "version 1.00.08a" #include "powernow-k8.h" #ifdef CONFIG_PREEMPT @@ -44,10 +44,11 @@ static u32 numps; /* number of p-states, static u32 rvo; /* ramp voltage offset, from PSB */ static u32 irt; /* isochronous relief time, from PSB */ static u32 vidmvs; /* usable value calculated from mvs, from PSB */ -struct pst_s *ppst; /* array of p states, valid for this part */ static u32 currvid; /* keep track of the current fid / vid */ static u32 currfid; +static struct cpufreq_frequency_table *powernow_table; + /* The PSB table supplied by BIOS allows for the definition of the number of p-states that can be used when running on a/c, and the number of p-states @@ -71,30 +72,12 @@ so this is not actually a restriction. static u32 batps; /* limit on the number of p states when on battery */ /* - set by BIOS in the PSB/PST */ -static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .name = "cpufreq-amd64", - .owner = THIS_MODULE, -}; - -#define SEARCH_UP 1 -#define SEARCH_DOWN 0 - -/* Return a frequency in MHz, given an input fid */ -u32 -find_freq_from_fid(u32 fid) + /* Return a frequency in MHz, given an input fid */ +static u32 find_freq_from_fid(u32 fid) { - return 800 + (fid * 100); + return 800 + (fid * 100); } -/* Return a fid matching an input frequency in MHz */ -static u32 -find_fid_from_freq(u32 freq) -{ - return (freq - 800) / 100; -} /* Return the vco fid for an input fid */ static u32 @@ -107,56 +90,27 @@ convert_fid_to_vco_fid(u32 fid) } } -/* Sort the fid/vid frequency table into ascending order by fid. The spec */ -/* implies that it will be sorted by BIOS, but, it only implies it, and I */ -/* prefer not to trust when I can check. */ -/* Yes, it is a simple bubble sort, but the PST is really small, so the */ -/* choice of algorithm is pretty irrelevant. */ -static inline void -sort_pst(struct pst_s *ppst, u32 numpstates) -{ - u32 i; - u8 tempfid; - u8 tempvid; - int swaps = 1; - - while (swaps) { - swaps = 0; - for (i = 0; i < (numpstates - 1); i++) { - if (ppst[i].fid > ppst[i + 1].fid) { - swaps = 1; - tempfid = ppst[i].fid; - tempvid = ppst[i].vid; - ppst[i].fid = ppst[i + 1].fid; - ppst[i].vid = ppst[i + 1].vid; - ppst[i + 1].fid = tempfid; - ppst[i + 1].vid = tempvid; - } - } - } - - return; -} - -/* Return 1 if the pending bit is set. Unless we are actually just told the */ -/* processor to transition a state, seeing this bit set is really bad news. */ +/* + * Return 1 if the pending bit is set. Unless we are actually just told the + * processor to transition a state, seeing this bit set is really bad news. + */ static inline int pending_bit_stuck(void) { - u32 lo; - u32 hi; + u32 lo, hi; rdmsr(MSR_FIDVID_STATUS, lo, hi); return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; } -/* Update the global current fid / vid values from the status msr. Returns 1 */ -/* on error. */ +/* + * Update the global current fid / vid values from the status msr. Returns 1 + * on error. + */ static int query_current_values_with_pending_wait(void) { - u32 lo; - u32 hi; + u32 lo, hi; u32 i = 0; lo = MSR_S_LO_CHANGE_PENDING; @@ -271,9 +225,11 @@ write_new_vid(u32 vid) return 0; } -/* Reduce the vid by the max of step or reqvid. */ -/* Decreasing vid codes represent increasing voltages : */ -/* vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. */ +/* + * Reduce the vid by the max of step or reqvid. + * Decreasing vid codes represent increasing voltages: + * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. + */ static int decrease_vid_code_by_step(u32 reqvid, u32 step) { @@ -316,8 +272,10 @@ transition_fid_vid(u32 reqfid, u32 reqvi return 0; } -/* Phase 1 - core voltage transition ... setup appropriate voltage for the */ -/* fid transition. */ +/* + * Phase 1 - core voltage transition ... setup appropriate voltage for the + * fid transition. + */ static inline int core_voltage_pre_transition(u32 reqvid) { @@ -500,7 +458,9 @@ check_supported_cpu(void) } if (c->x86_vendor != X86_VENDOR_AMD) { +#ifdef MODULE printk(KERN_INFO PFX "Not an AMD processor\n"); +#endif return 0; } @@ -533,20 +493,59 @@ check_supported_cpu(void) return 0; } - printk(KERN_INFO PFX "Found AMD Athlon 64 / Opteron processor " - "supporting p-state transitions\n"); - + printk(KERN_INFO PFX "Found AMD64 processor supporting PowerNow (" VERSION ")\n"); return 1; } +static int check_pst_table(struct pst_s *pst, u8 maxvid) +{ + unsigned int j; + u8 lastfid = 0xFF; + + for (j = 0; j < numps; j++) { + if (pst[j].vid > LEAST_VID) { + printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); + return -EINVAL; + } + if (pst[j].vid < rvo) { /* vid + rvo >= 0 */ + printk(KERN_ERR PFX + "BIOS error - 0 vid exceeded with pstate %d\n", + j); + return -ENODEV; + } + if (pst[j].vid < maxvid + rvo) { /* vid + rvo >= maxvid */ + printk(KERN_ERR PFX + "BIOS error - maxvid exceeded with pstate %d\n", + j); + return -ENODEV; + } + if ((pst[j].fid > MAX_FID) + || (pst[j].fid & 1) + || (pst[j].fid < HI_FID_TABLE_BOTTOM)){ + printk(KERN_ERR PFX "fid %d invalid : 0x%x\n", j, pst[j].fid); + return -EINVAL; + } + if (pst[j].fid < lastfid) + lastfid = pst[j].fid; + } + if (lastfid & 1) { + printk(KERN_ERR PFX "lastfid invalid\n"); + return -EINVAL; + } + if (lastfid > LO_FID_TABLE_TOP) { + printk(KERN_INFO PFX "first fid not from lo freq table\n"); + } + + return 0; +} + /* Find and validate the PSB/PST table in BIOS. */ static inline int find_psb_table(void) { struct psb_s *psb; struct pst_s *pst; - unsigned i, j; - u32 lastfid; + unsigned int i, j; u32 mvs; u8 maxvid; @@ -573,33 +572,19 @@ find_psb_table(void) } vstable = psb->voltagestabilizationtime; - printk(KERN_INFO PFX "voltage stable time: %d (units 20us)\n", - vstable); - dprintk(KERN_DEBUG PFX "flags2: 0x%x\n", psb->flags2); rvo = psb->flags2 & 3; irt = ((psb->flags2) >> 2) & 3; mvs = ((psb->flags2) >> 4) & 3; vidmvs = 1 << mvs; batps = ((psb->flags2) >> 6) & 3; - printk(KERN_INFO PFX "p states on battery: %d ", batps); - switch (batps) { - case 0: - printk("- all available\n"); - break; - case 1: - printk("- only the minimum\n"); - break; - case 2: - printk("- only the 2 lowest\n"); - break; - case 3: - printk("- only the 3 lowest\n"); - break; - } - printk(KERN_INFO PFX "ramp voltage offset: %d\n", rvo); - printk(KERN_INFO PFX "isochronous relief time: %d\n", irt); - printk(KERN_INFO PFX "maximum voltage step: %d\n", mvs); + + printk(KERN_INFO PFX "voltage stable in %d usec", vstable * 20); + if (batps) + printk(", only %d lowest states on battery", batps); + printk(", ramp voltage offset: %d", rvo); + printk(", isochronous relief time: %d", irt); + printk(", maximum voltage step: %d\n", mvs); dprintk(KERN_DEBUG PFX "numpst: 0x%x\n", psb->numpst); if (psb->numpst != 1) { @@ -610,14 +595,13 @@ find_psb_table(void) dprintk(KERN_DEBUG PFX "cpuid: 0x%x\n", psb->cpuid); plllock = psb->plllocktime; - printk(KERN_INFO PFX "pll lock time: 0x%x\n", plllock); + printk(KERN_INFO PFX "pll lock time: 0x%x, ", plllock); maxvid = psb->maxvid; - printk(KERN_INFO PFX "maxfid: 0x%x\n", psb->maxfid); - printk(KERN_INFO PFX "maxvid: 0x%x\n", maxvid); + printk("maxfid 0x%x (%d MHz), maxvid 0x%x\n", + psb->maxfid, find_freq_from_fid(psb->maxfid), maxvid); numps = psb->numpstates; - printk(KERN_INFO PFX "numpstates: 0x%x\n", numps); if (numps < 2) { printk(KERN_ERR BFX "no p states to transition\n"); return -ENODEV; @@ -636,78 +620,41 @@ find_psb_table(void) "%d p-states\n", numps); } - if ((numps <= 1) || (batps <= 1)) { + if (numps <= 1) { printk(KERN_ERR PFX "only 1 p-state to transition\n"); return -ENODEV; } - ppst = kmalloc(sizeof (struct pst_s) * numps, GFP_KERNEL); - if (!ppst) { - printk(KERN_ERR PFX "ppst memory alloc failure\n"); - return -ENOMEM; - } - pst = (struct pst_s *) (psb + 1); - for (j = 0; j < numps; j++) { - ppst[j].fid = pst[j].fid; - ppst[j].vid = pst[j].vid; - printk(KERN_INFO PFX - " %d : fid 0x%x, vid 0x%x\n", j, - ppst[j].fid, ppst[j].vid); - } - sort_pst(ppst, numps); - - lastfid = ppst[0].fid; - if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO BFX "first fid not in lo freq tbl\n"); - - if ((lastfid > MAX_FID) || (lastfid & 1) || (ppst[0].vid > LEAST_VID)) { - printk(KERN_ERR BFX "first fid/vid bad (0x%x - 0x%x)\n", - lastfid, ppst[0].vid); - kfree(ppst); - return -ENODEV; - } + if (check_pst_table(pst, maxvid)) + return -EINVAL; - for (j = 1; j < numps; j++) { - if ((lastfid >= ppst[j].fid) - || (ppst[j].fid & 1) - || (ppst[j].fid < HI_FID_TABLE_BOTTOM) - || (ppst[j].fid > MAX_FID) - || (ppst[j].vid > LEAST_VID)) { - printk(KERN_ERR BFX - "invalid fid/vid in pst(%x %x)\n", - ppst[j].fid, ppst[j].vid); - kfree(ppst); - return -ENODEV; - } - lastfid = ppst[j].fid; + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (numps + 1)), GFP_KERNEL); + if (!powernow_table) { + printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); + return -ENOMEM; } for (j = 0; j < numps; j++) { - if (ppst[j].vid < rvo) { /* vid+rvo >= 0 */ - printk(KERN_ERR BFX - "0 vid exceeded with pstate %d\n", j); - kfree(ppst); - return -ENODEV; - } - if (ppst[j].vid < maxvid+rvo) { /* vid+rvo >= maxvid */ - printk(KERN_ERR BFX - "maxvid exceeded with pstate %d\n", j); - kfree(ppst); - return -ENODEV; - } + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, + pst[j].fid, find_freq_from_fid(pst[j].fid), pst[j].vid); + powernow_table[j].index = pst[j].fid; /* lower 8 bits */ + powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ + powernow_table[j].frequency = find_freq_from_fid(pst[j].fid); } + powernow_table[numps].frequency = CPUFREQ_TABLE_END; + powernow_table[numps].index = 0; if (query_current_values_with_pending_wait()) { - kfree(ppst); + kfree(powernow_table); return -EIO; } - printk(KERN_INFO PFX "currfid 0x%x, currvid 0x%x\n", - currfid, currvid); + printk(KERN_INFO PFX "currfid 0x%x (%d MHz), currvid 0x%x\n", + currfid, find_freq_from_fid(currfid), currvid); for (j = 0; j < numps; j++) - if ((ppst[j].fid==currfid) && (ppst[j].vid==currvid)) + if ((pst[j].fid==currfid) && (pst[j].vid==currvid)) return 0; printk(KERN_ERR BFX "currfid/vid do not match PST, ignoring\n"); @@ -718,112 +665,22 @@ find_psb_table(void) return -ENODEV; } -/* Converts a frequency (that might not necessarily be a multiple of 200) */ -/* to a fid. */ -static u32 -find_closest_fid(u32 freq, int searchup) -{ - if (searchup == SEARCH_UP) - freq += MIN_FREQ_RESOLUTION - 1; - - freq = (freq / MIN_FREQ_RESOLUTION) * MIN_FREQ_RESOLUTION; - - if (freq < MIN_FREQ) - freq = MIN_FREQ; - else if (freq > MAX_FREQ) - freq = MAX_FREQ; - - return find_fid_from_freq(freq); -} - -static int -find_match(u32 * ptargfreq, u32 * pmin, u32 * pmax, int searchup, u32 * pfid, - u32 * pvid) -{ - u32 availpstates = batps; - u32 targfid = find_closest_fid(*ptargfreq, searchup); - u32 minfid = find_closest_fid(*pmin, SEARCH_DOWN); - u32 maxfid = find_closest_fid(*pmax, SEARCH_UP); - u32 minidx = 0; - u32 maxidx = availpstates - 1; - u32 targidx = 0xffffffff; - int i; - - dprintk(KERN_DEBUG PFX "find match: freq %d MHz, min %d, max %d\n", - *ptargfreq, *pmin, *pmax); - - /* Restrict values to the frequency choices in the PST */ - if (minfid < ppst[0].fid) - minfid = ppst[0].fid; - if (maxfid > ppst[maxidx].fid) - maxfid = ppst[maxidx].fid; - - /* Find appropriate PST index for the minimim fid */ - for (i = 0; i < (int) availpstates; i++) { - if (minfid >= ppst[i].fid) - minidx = i; - } - - /* Find appropriate PST index for the maximum fid */ - for (i = availpstates - 1; i >= 0; i--) { - if (maxfid <= ppst[i].fid) - maxidx = i; - } - - if (minidx > maxidx) - maxidx = minidx; - - /* Frequency ids are now constrained by limits matching PST entries */ - minfid = ppst[minidx].fid; - maxfid = ppst[maxidx].fid; - - /* Limit the target frequency to these limits */ - if (targfid < minfid) - targfid = minfid; - else if (targfid > maxfid) - targfid = maxfid; - - /* Find the best target index into the PST, contrained by the range */ - if (searchup == SEARCH_UP) { - for (i = maxidx; i >= (int) minidx; i--) { - if (targfid <= ppst[i].fid) - targidx = i; - } - } else { - for (i = minidx; i <= (int) maxidx; i++) { - if (targfid >= ppst[i].fid) - targidx = i; - } - } - - if (targidx == 0xffffffff) { - printk(KERN_ERR PFX "could not find target\n"); - return 1; - } - - *pmin = find_freq_from_fid(minfid); - *pmax = find_freq_from_fid(maxfid); - *ptargfreq = find_freq_from_fid(ppst[targidx].fid); - - if (pfid) - *pfid = ppst[targidx].fid; - if (pvid) - *pvid = ppst[targidx].vid; - - return 0; -} - /* Take a frequency, and issue the fid/vid transition command */ static inline int -transition_frequency(u32 * preq, u32 * pmin, u32 * pmax, u32 searchup) +transition_frequency(unsigned int index) { u32 fid; u32 vid; int res; struct cpufreq_freqs freqs; - if (find_match(preq, pmin, pmax, searchup, &fid, &vid)) - return 1; + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in find_psb_table, vid are + * the upper 8 bits. + */ + + fid = powernow_table[index].index & 0xFF; + vid = (powernow_table[index].index & 0xFF00) >> 8; dprintk(KERN_DEBUG PFX "table matched fid 0x%x, giving vid 0x%x\n", fid, vid); @@ -867,14 +724,7 @@ powernowk8_target(struct cpufreq_policy { u32 checkfid = currfid; u32 checkvid = currvid; - u32 reqfreq = targfreq / 1000; - u32 minfreq = pol->min / 1000; - u32 maxfreq = pol->max / 1000; - - if (ppst == 0) { - printk(KERN_ERR PFX "targ: ppst 0\n"); - return -ENODEV; - } + unsigned int newstate; if (pending_bit_stuck()) { printk(KERN_ERR PFX "drv targ fail: change pending bit set\n"); @@ -896,9 +746,10 @@ powernowk8_target(struct cpufreq_policy checkfid, currfid, checkvid, currvid); } - if (transition_frequency(&reqfreq, &minfreq, &maxfreq, - relation == - CPUFREQ_RELATION_H ? SEARCH_UP : SEARCH_DOWN)) + if (cpufreq_frequency_table_target(pol, powernow_table, targfreq, relation, &newstate)) + return -EINVAL; + + if (transition_frequency(newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); return 1; @@ -913,36 +764,12 @@ powernowk8_target(struct cpufreq_policy static int powernowk8_verify(struct cpufreq_policy *pol) { - u32 min = pol->min / 1000; - u32 max = pol->max / 1000; - u32 targ = min; - int res; - - if (ppst == 0) { - printk(KERN_ERR PFX "verify - ppst 0\n"); - return -ENODEV; - } - if (pending_bit_stuck()) { printk(KERN_ERR PFX "failing verify, change pending bit set\n"); return -EIO; } - dprintk(KERN_DEBUG PFX - "ver: cpu%d, min %d, max %d, cur %d, pol %d\n", pol->cpu, - pol->min, pol->max, pol->cur, pol->policy); - - if (pol->cpu != 0) { - printk(KERN_ERR PFX "verify - cpu not 0\n"); - return -ENODEV; - } - - res = find_match(&targ, &min, &max, SEARCH_DOWN, 0, 0); - if (!res) { - pol->min = min * 1000; - pol->max = max * 1000; - } - return res; + return cpufreq_frequency_table_verify(pol, powernow_table); } /* per CPU init entry point to the driver */ @@ -968,10 +795,11 @@ powernowk8_cpu_init(struct cpufreq_polic dprintk(KERN_DEBUG PFX "policy current frequency %d kHz\n", pol->cur); /* min/max the cpu is capable of */ - pol->cpuinfo.min_freq = 1000 * find_freq_from_fid(ppst[0].fid); - pol->cpuinfo.max_freq = 1000 * find_freq_from_fid(ppst[numps-1].fid); - pol->min = 1000 * find_freq_from_fid(ppst[0].fid); - pol->max = 1000 * find_freq_from_fid(ppst[batps - 1].fid); + if (cpufreq_frequency_table_cpuinfo(pol, powernow_table)) { + printk(KERN_ERR PFX "invalid powernow_table\n"); + kfree(powernow_table); + return -EINVAL; + } printk(KERN_INFO PFX "cpu_init done, current fid 0x%x, vid 0x%x\n", currfid, currvid); @@ -979,14 +807,33 @@ powernowk8_cpu_init(struct cpufreq_polic return 0; } +static int __exit powernowk8_cpu_exit (struct cpufreq_policy *pol) +{ + if (pol->cpu != 0) + return -EINVAL; + + if (powernow_table) + kfree(powernow_table); + + return 0; +} + +static struct cpufreq_driver cpufreq_amd64_driver = { + .verify = powernowk8_verify, + .target = powernowk8_target, + .init = powernowk8_cpu_init, + .exit = powernowk8_cpu_exit, + .name = "powernow-k8", + .owner = THIS_MODULE, +}; + + /* driver entry point for init */ static int __init powernowk8_init(void) { int rc; - printk(KERN_INFO PFX VERSION "\n"); - if (check_supported_cpu() == 0) return -ENODEV; @@ -996,7 +843,6 @@ powernowk8_init(void) if (pending_bit_stuck()) { printk(KERN_ERR PFX "powernowk8_init fail, change pending bit set\n"); - kfree(ppst); return -EIO; } @@ -1010,7 +856,6 @@ powernowk8_exit(void) dprintk(KERN_INFO PFX "powernowk8_exit\n"); cpufreq_unregister_driver(&cpufreq_amd64_driver); - kfree(ppst); } MODULE_AUTHOR("Paul Devriendt "); --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/powernow-k8.h 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k8.h 2004-02-12 00:39:25.000000000 -0800 @@ -120,7 +120,3 @@ struct pst_s { static inline int core_voltage_pre_transition(u32 reqvid); static inline int core_voltage_post_transition(u32 reqvid); static inline int core_frequency_transition(u32 reqfid); -static int powernowk8_verify(struct cpufreq_policy *pol); -static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, - unsigned relation); -static int __init powernowk8_cpu_init(struct cpufreq_policy *pol); --- linux-2.6.3-rc2/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c 2004-02-12 00:39:25.000000000 -0800 @@ -207,17 +207,55 @@ unsigned int speedstep_detect_processor if (c->x86_model != 2) return 0; - if ((c->x86_mask != 4) && /* B-stepping [M-P4-M] */ - (c->x86_mask != 7) && /* C-stepping [M-P4-M] */ - (c->x86_mask != 9)) /* D-stepping [M-P4-M or M-P4/533] */ - return 0; - ebx = cpuid_ebx(0x00000001); ebx &= 0x000000FF; - if ((ebx != 0x0e) && (ebx != 0x0f)) - return 0; - return SPEEDSTEP_PROCESSOR_P4M; + dprintk(KERN_INFO "ebx value is %x, x86_mask is %x\n", ebx, c->86_mask); + + switch (c->x86_mask) { + case 4: + /* + * B-stepping [M-P4-M] + * sample has ebx = 0x0f, production has 0x0e. + */ + if ((ebx == 0x0e) || (ebx == 0x0f)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 7: + /* + * C-stepping [M-P4-M] + * needs to have ebx=0x0e, else it's a celeron: + * cf. 25130917.pdf / page 7, footnote 5 even + * though 25072120.pdf / page 7 doesn't say + * samples are only of B-stepping... + */ + if (ebx == 0x0e) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 9: + /* + * D-stepping [M-P4-M or M-P4/533] + * + * this is totally strange: CPUID 0x0F29 is + * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. + * The latter need to be sorted out as they don't + * support speedstep. + * Celerons with CPUID 0x0F29 may have either + * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything + * specific. + * M-P4-Ms may have either ebx=0xe or 0xf [see above] + * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] + * So, how to distinguish all those processors with + * ebx=0xf? I don't know. Sort them out, and wait + * for someone to complain. + */ + if (ebx == 0x0e) + return SPEEDSTEP_PROCESSOR_P4M; + break; + default: + break; + } + return 0; } switch (c->x86_model) { --- linux-2.6.3-rc2/arch/i386/kernel/cpu/intel.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/intel.c 2004-02-12 00:41:23.000000000 -0800 @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu.h" @@ -19,8 +20,6 @@ #include #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -165,7 +164,7 @@ static void __init init_intel(struct cpu c->f00f_bug = 1; if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); + trap_init_virtual_IDT(); printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } @@ -248,6 +247,12 @@ static void __init init_intel(struct cpu /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_bit(X86_FEATURE_SEP, c->x86_capability); + /* + * FIXME: SEP is disabled for 4G/4G for now: + */ +#ifdef CONFIG_X86_HIGH_ENTRY + clear_bit(X86_FEATURE_SEP, c->x86_capability); +#endif /* Names for the Pentium II/Celeron processors detectable only by also checking the cache size. @@ -296,12 +301,8 @@ static void __init init_intel(struct cpu } else if (smp_num_siblings > 1 ) { index_lsb = 0; index_msb = 31; - /* - * At this point we only support two siblings per - * processor package. - */ -#define NR_SIBLINGS 2 - if (smp_num_siblings != NR_SIBLINGS) { + + if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; goto too_many_siblings; --- linux-2.6.3-rc2/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-02-12 00:41:08.000000000 -0800 @@ -24,8 +24,6 @@ #include "mce.h" -static struct timer_list mce_timer; -static int timerset; static int firstbank; #define MCE_RATE 15*HZ /* timer rate is 15s */ @@ -35,14 +33,15 @@ static void mce_checkregs (void *info) u32 low, high; int i; - preempt_disable(); for (i=firstbank; i 1) - schedule_work (&mce_work); -#endif - mce_timer.expires = jiffies + MCE_RATE; - add_timer (&mce_timer); -} - static int __init init_nonfatal_mce_checker(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -91,17 +80,11 @@ static int __init init_nonfatal_mce_chec else firstbank = 0; - if (timerset == 0) { - /* Set the timer to check for non-fatal - errors every MCE_RATE seconds */ - init_timer (&mce_timer); - mce_timer.expires = jiffies + MCE_RATE; - mce_timer.data = 0; - mce_timer.function = &mce_timerfunc; - add_timer (&mce_timer); - timerset = 1; - printk(KERN_INFO "Machine check exception polling timer started.\n"); - } + /* + * Check for non-fatal errors every MCE_RATE s + */ + schedule_delayed_work(&mce_work, MCE_RATE); + printk(KERN_INFO "Machine check exception polling timer started.\n"); return 0; } module_init(init_nonfatal_mce_checker); --- linux-2.6.3-rc2/arch/i386/kernel/cpu/proc.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/kernel/cpu/proc.c 2004-02-12 00:40:50.000000000 -0800 @@ -50,7 +50,7 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, --- linux-2.6.3-rc2/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/doublefault.c 2004-02-12 00:41:23.000000000 -0800 @@ -7,12 +7,13 @@ #include #include #include +#include #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) -#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) +#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) static void doublefault_fn(void) { @@ -38,8 +39,8 @@ static void doublefault_fn(void) printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", t->eax, t->ebx, t->ecx, t->edx); - printk("esi = %08lx, edi = %08lx\n", - t->esi, t->edi); + printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", + t->esi, t->edi, t->ebp); } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/early_printk.c 2004-02-12 00:41:12.000000000 -0800 @@ -0,0 +1,2 @@ + +#include "../../x86_64/kernel/early_printk.c" --- linux-2.6.3-rc2/arch/i386/kernel/edd.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/edd.c 2004-02-12 00:40:49.000000000 -0800 @@ -134,18 +134,18 @@ edd_show_host_bus(struct edd_device *ede for (i = 0; i < 4; i++) { if (isprint(info->params.host_bus_type[i])) { - p += snprintf(p, left, "%c", info->params.host_bus_type[i]); + p += scnprintf(p, left, "%c", info->params.host_bus_type[i]); } else { - p += snprintf(p, left, " "); + p += scnprintf(p, left, " "); } } if (!strncmp(info->params.host_bus_type, "ISA", 3)) { - p += snprintf(p, left, "\tbase_address: %x\n", + p += scnprintf(p, left, "\tbase_address: %x\n", info->params.interface_path.isa.base_address); } else if (!strncmp(info->params.host_bus_type, "PCIX", 4) || !strncmp(info->params.host_bus_type, "PCI", 3)) { - p += snprintf(p, left, + p += scnprintf(p, left, "\t%02x:%02x.%d channel: %u\n", info->params.interface_path.pci.bus, info->params.interface_path.pci.slot, @@ -154,12 +154,12 @@ edd_show_host_bus(struct edd_device *ede } else if (!strncmp(info->params.host_bus_type, "IBND", 4) || !strncmp(info->params.host_bus_type, "XPRS", 4) || !strncmp(info->params.host_bus_type, "HTPT", 4)) { - p += snprintf(p, left, + p += scnprintf(p, left, "\tTBD: %llx\n", info->params.interface_path.ibnd.reserved); } else { - p += snprintf(p, left, "\tunknown: %llx\n", + p += scnprintf(p, left, "\tunknown: %llx\n", info->params.interface_path.unknown.reserved); } return (p - buf); @@ -178,43 +178,43 @@ edd_show_interface(struct edd_device *ed for (i = 0; i < 8; i++) { if (isprint(info->params.interface_type[i])) { - p += snprintf(p, left, "%c", info->params.interface_type[i]); + p += scnprintf(p, left, "%c", info->params.interface_type[i]); } else { - p += snprintf(p, left, " "); + p += scnprintf(p, left, " "); } } if (!strncmp(info->params.interface_type, "ATAPI", 5)) { - p += snprintf(p, left, "\tdevice: %u lun: %u\n", + p += scnprintf(p, left, "\tdevice: %u lun: %u\n", info->params.device_path.atapi.device, info->params.device_path.atapi.lun); } else if (!strncmp(info->params.interface_type, "ATA", 3)) { - p += snprintf(p, left, "\tdevice: %u\n", + p += scnprintf(p, left, "\tdevice: %u\n", info->params.device_path.ata.device); } else if (!strncmp(info->params.interface_type, "SCSI", 4)) { - p += snprintf(p, left, "\tid: %u lun: %llu\n", + p += scnprintf(p, left, "\tid: %u lun: %llu\n", info->params.device_path.scsi.id, info->params.device_path.scsi.lun); } else if (!strncmp(info->params.interface_type, "USB", 3)) { - p += snprintf(p, left, "\tserial_number: %llx\n", + p += scnprintf(p, left, "\tserial_number: %llx\n", info->params.device_path.usb.serial_number); } else if (!strncmp(info->params.interface_type, "1394", 4)) { - p += snprintf(p, left, "\teui: %llx\n", + p += scnprintf(p, left, "\teui: %llx\n", info->params.device_path.i1394.eui); } else if (!strncmp(info->params.interface_type, "FIBRE", 5)) { - p += snprintf(p, left, "\twwid: %llx lun: %llx\n", + p += scnprintf(p, left, "\twwid: %llx lun: %llx\n", info->params.device_path.fibre.wwid, info->params.device_path.fibre.lun); } else if (!strncmp(info->params.interface_type, "I2O", 3)) { - p += snprintf(p, left, "\tidentity_tag: %llx\n", + p += scnprintf(p, left, "\tidentity_tag: %llx\n", info->params.device_path.i2o.identity_tag); } else if (!strncmp(info->params.interface_type, "RAID", 4)) { - p += snprintf(p, left, "\tidentity_tag: %x\n", + p += scnprintf(p, left, "\tidentity_tag: %x\n", info->params.device_path.raid.array_number); } else if (!strncmp(info->params.interface_type, "SATA", 4)) { - p += snprintf(p, left, "\tdevice: %u\n", + p += scnprintf(p, left, "\tdevice: %u\n", info->params.device_path.sata.device); } else { - p += snprintf(p, left, "\tunknown: %llx %llx\n", + p += scnprintf(p, left, "\tunknown: %llx %llx\n", info->params.device_path.unknown.reserved1, info->params.device_path.unknown.reserved2); } @@ -256,7 +256,7 @@ edd_show_version(struct edd_device *edev return -EINVAL; } - p += snprintf(p, left, "0x%02x\n", info->version); + p += scnprintf(p, left, "0x%02x\n", info->version); return (p - buf); } @@ -264,7 +264,7 @@ static ssize_t edd_show_disk80_sig(struct edd_device *edev, char *buf) { char *p = buf; - p += snprintf(p, left, "0x%08x\n", edd_disk80_sig); + p += scnprintf(p, left, "0x%08x\n", edd_disk80_sig); return (p - buf); } @@ -278,16 +278,16 @@ edd_show_extensions(struct edd_device *e } if (info->interface_support & EDD_EXT_FIXED_DISK_ACCESS) { - p += snprintf(p, left, "Fixed disk access\n"); + p += scnprintf(p, left, "Fixed disk access\n"); } if (info->interface_support & EDD_EXT_DEVICE_LOCKING_AND_EJECTING) { - p += snprintf(p, left, "Device locking and ejecting\n"); + p += scnprintf(p, left, "Device locking and ejecting\n"); } if (info->interface_support & EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT) { - p += snprintf(p, left, "Enhanced Disk Drive support\n"); + p += scnprintf(p, left, "Enhanced Disk Drive support\n"); } if (info->interface_support & EDD_EXT_64BIT_EXTENSIONS) { - p += snprintf(p, left, "64-bit extensions\n"); + p += scnprintf(p, left, "64-bit extensions\n"); } return (p - buf); } @@ -302,21 +302,21 @@ edd_show_info_flags(struct edd_device *e } if (info->params.info_flags & EDD_INFO_DMA_BOUNDARY_ERROR_TRANSPARENT) - p += snprintf(p, left, "DMA boundary error transparent\n"); + p += scnprintf(p, left, "DMA boundary error transparent\n"); if (info->params.info_flags & EDD_INFO_GEOMETRY_VALID) - p += snprintf(p, left, "geometry valid\n"); + p += scnprintf(p, left, "geometry valid\n"); if (info->params.info_flags & EDD_INFO_REMOVABLE) - p += snprintf(p, left, "removable\n"); + p += scnprintf(p, left, "removable\n"); if (info->params.info_flags & EDD_INFO_WRITE_VERIFY) - p += snprintf(p, left, "write verify\n"); + p += scnprintf(p, left, "write verify\n"); if (info->params.info_flags & EDD_INFO_MEDIA_CHANGE_NOTIFICATION) - p += snprintf(p, left, "media change notification\n"); + p += scnprintf(p, left, "media change notification\n"); if (info->params.info_flags & EDD_INFO_LOCKABLE) - p += snprintf(p, left, "lockable\n"); + p += scnprintf(p, left, "lockable\n"); if (info->params.info_flags & EDD_INFO_NO_MEDIA_PRESENT) - p += snprintf(p, left, "no media present\n"); + p += scnprintf(p, left, "no media present\n"); if (info->params.info_flags & EDD_INFO_USE_INT13_FN50) - p += snprintf(p, left, "use int13 fn50\n"); + p += scnprintf(p, left, "use int13 fn50\n"); return (p - buf); } @@ -329,7 +329,7 @@ edd_show_default_cylinders(struct edd_de return -EINVAL; } - p += snprintf(p, left, "0x%x\n", info->params.num_default_cylinders); + p += scnprintf(p, left, "0x%x\n", info->params.num_default_cylinders); return (p - buf); } @@ -342,7 +342,7 @@ edd_show_default_heads(struct edd_device return -EINVAL; } - p += snprintf(p, left, "0x%x\n", info->params.num_default_heads); + p += scnprintf(p, left, "0x%x\n", info->params.num_default_heads); return (p - buf); } @@ -355,7 +355,7 @@ edd_show_default_sectors_per_track(struc return -EINVAL; } - p += snprintf(p, left, "0x%x\n", info->params.sectors_per_track); + p += scnprintf(p, left, "0x%x\n", info->params.sectors_per_track); return (p - buf); } @@ -368,7 +368,7 @@ edd_show_sectors(struct edd_device *edev return -EINVAL; } - p += snprintf(p, left, "0x%llx\n", info->params.number_of_sectors); + p += scnprintf(p, left, "0x%llx\n", info->params.number_of_sectors); return (p - buf); } --- linux-2.6.3-rc2/arch/i386/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/kernel/entry.S 2004-02-12 00:41:23.000000000 -0800 @@ -43,11 +43,25 @@ #include #include #include +#include #include #include +#include #include #include #include "irq_vectors.h" + /* We do not recover from a stack overflow, but at least + * we know it happened and should be able to track it down. + */ +#ifdef CONFIG_STACK_OVERFLOW_TEST +#define STACK_OVERFLOW_TEST \ + testl $7680,%esp; \ + jnz 10f; \ + call stack_overflow; \ +10: +#else +#define STACK_OVERFLOW_TEST +#endif #define nr_syscalls ((syscall_table_size)/4) @@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) #define resume_kernel restore_all #endif -#define SAVE_ALL \ +#ifdef CONFIG_X86_HIGH_ENTRY + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +/* + * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, + * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is + * left stale, so we must check whether to repeat the real stack calculation. + */ +#define repeat_if_esp_changed \ + xorl %esp, %ebp; \ + testl $0xffffe000, %ebp; \ + jnz 0b +#else +#define repeat_if_esp_changed +#endif + +/* clobbers ebx, edx and ebp */ + +#define __SWITCH_KERNELSPACE \ + cmpl $0xff000000, %esp; \ + jb 1f; \ + \ + /* \ + * switch pagetables and load the real stack, \ + * keep the stack offset: \ + */ \ + \ + movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ + \ + /* GET_THREAD_INFO(%ebp) intermixed */ \ +0: \ + movl %esp, %ebp; \ + movl %esp, %ebx; \ + andl $0xffffe000, %ebp; \ + andl $0x00001fff, %ebx; \ + orl TI_real_stack(%ebp), %ebx; \ + repeat_if_esp_changed; \ + \ + movl %edx, %cr3; \ + movl %ebx, %esp; \ +1: + +#endif + + +#define __SWITCH_USERSPACE \ + /* interrupted any of the user return paths? */ \ + \ + movl EIP(%esp), %eax; \ + \ + cmpl $int80_ret_start_marker, %eax; \ + jb 33f; /* nope - continue with sysexit check */\ + cmpl $int80_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ +33: \ + cmpl $sysexit_ret_start_marker, %eax; \ + jb 44f; /* nope - continue with user check */ \ + cmpl $sysexit_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ + /* return to userspace? */ \ +44: \ + movl EFLAGS(%esp),%ecx; \ + movb CS(%esp),%cl; \ + testl $(VM_MASK | 3),%ecx; \ + jz 2f; \ +22: \ + /* \ + * switch to the virtual stack, then switch to \ + * the userspace pagetables. \ + */ \ + \ + GET_THREAD_INFO(%ebp); \ + movl TI_virtual_stack(%ebp), %edx; \ + movl TI_user_pgd(%ebp), %ecx; \ + \ + movl %esp, %ebx; \ + andl $0x1fff, %ebx; \ + orl %ebx, %edx; \ +int80_ret_start_marker: \ + movl %edx, %esp; \ + movl %ecx, %cr3; \ + \ + __RESTORE_ALL; \ +int80_ret_end_marker: \ +2: + +#else /* !CONFIG_X86_HIGH_ENTRY */ + +#define __SWITCH_KERNELSPACE +#define __SWITCH_USERSPACE + +#endif + +#define __SAVE_ALL \ cld; \ pushl %es; \ pushl %ds; \ @@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) movl %edx, %ds; \ movl %edx, %es; -#define RESTORE_INT_REGS \ +#define __RESTORE_INT_REGS \ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) popl %ebp; \ popl %eax -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ -2: popl %es; \ +#define __RESTORE_REGS \ + __RESTORE_INT_REGS; \ +111: popl %ds; \ +222: popl %es; \ .section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ -4: movl $0,(%esp); \ - jmp 2b; \ +444: movl $0,(%esp); \ + jmp 111b; \ +555: movl $0,(%esp); \ + jmp 222b; \ .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ + .long 111b,444b;\ + .long 222b,555b;\ .previous - -#define RESTORE_ALL \ - RESTORE_REGS \ +#define __RESTORE_ALL \ + __RESTORE_REGS \ addl $4, %esp; \ -1: iret; \ +333: iret; \ .section .fixup,"ax"; \ -2: sti; \ +666: sti; \ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ @@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,2b; \ + .long 333b,666b;\ .previous +#define SAVE_ALL \ + __SAVE_ALL; \ + __SWITCH_KERNELSPACE; \ + STACK_OVERFLOW_TEST; + +#define RESTORE_ALL \ + __SWITCH_USERSPACE; \ + __RESTORE_ALL; +.section .entry.text,"ax" ENTRY(lcall7) pushfl # We get a different stack layout with call @@ -163,7 +280,7 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # andl $-8192, %ebp # GET_THREAD_INFO - movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain + movl TI_exec_domain(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp popl %eax @@ -208,7 +325,7 @@ ENTRY(resume_userspace) cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending @@ -216,18 +333,18 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all need_resched: - movl TI_FLAGS(%ebp), %ecx # need_resched set ? + movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all - movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) + movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) sti call schedule - movl $0,TI_PRE_COUNT(%ebp) + movl $0,TI_preempt_count(%ebp) cli jmp need_resched #endif @@ -246,37 +363,50 @@ sysenter_past_esp: pushl $(__USER_CS) pushl $SYSENTER_RETURN -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - pushl %eax SAVE_ALL GET_THREAD_INFO(%ebp) cmpl $(nr_syscalls), %eax jae syscall_badsys - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + + GET_THREAD_INFO(%ebp) + movl TI_virtual_stack(%ebp), %edx + movl TI_user_pgd(%ebp), %ecx + movl %esp, %ebx + andl $0x1fff, %ebx + orl %ebx, %edx +sysexit_ret_start_marker: + movl %edx, %esp + movl %ecx, %cr3 +#endif + /* + * only ebx is not restored by the userspace sysenter vsyscall + * code, it assumes it to be callee-saved. + */ + movl EBX(%esp), %ebx + /* if something modifies registers it must also disable sysexit */ + movl EIP(%esp), %edx movl OLDESP(%esp), %ecx + sti sysexit +#ifdef CONFIG_X86_SWITCH_PAGETABLES +sysexit_ret_end_marker: + nop +#endif # system call handler stub @@ -287,7 +417,7 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) @@ -296,10 +426,23 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work restore_all: +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernelX # returning to kernel or vm86-space + + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jz resume_kernelX + + int $3 + +resume_kernelX: +#endif RESTORE_ALL # perform work that needs to be done immediately before resumption @@ -312,7 +455,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all @@ -327,6 +470,22 @@ work_notifysig: # deal with pending s # vm86-space xorl %edx, %edx call do_notify_resume + +#if CONFIG_X86_HIGH_ENTRY + /* + * Reload db7 if necessary: + */ + movl TI_flags(%ebp), %ecx + testb $_TIF_DB7, %cl + jnz work_db7 + + jmp restore_all + +work_db7: + movl TI_task(%ebp), %edx; + movl task_thread_db7(%edx), %edx; + movl %edx, %db7; +#endif jmp restore_all ALIGN @@ -382,7 +541,7 @@ syscall_badsys: */ .data ENTRY(interrupt) -.text +.previous vector=0 ENTRY(irq_entries_start) @@ -392,7 +551,7 @@ ENTRY(irq_entries_start) jmp common_interrupt .data .long 1b -.text +.previous vector=vector+1 .endr @@ -433,12 +592,17 @@ error_code: movl ES(%esp), %edi # get the function address movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) - movl %esp, %edx pushl %esi # push the error code - pushl %edx # push the pt_regs pointer movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es + +/* clobbers edx, ebx and ebp */ + __SWITCH_KERNELSPACE + + leal 4(%esp), %edx # prepare pt_regs + pushl %edx # push pt_regs + call *%edi addl $8, %esp jmp ret_from_exception @@ -515,8 +679,8 @@ ENTRY(nmi) /* Do not access memory above the end of our stack page, * it might not exist. */ - andl $0x1fff,%eax - cmpl $0x1fec,%eax + andl $(THREAD_SIZE-1),%eax + cmpl $(THREAD_SIZE-20),%eax popl %eax jae nmi_stack_correct cmpl $sysenter_entry,12(%esp) @@ -529,7 +693,7 @@ nmi_stack_correct: pushl %edx call do_nmi addl $8, %esp - RESTORE_ALL + jmp restore_all nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -606,6 +770,8 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.previous + .data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ @@ -882,5 +1048,8 @@ ENTRY(sys_call_table) .long sys_utimes .long sys_fadvise64_64 .long sys_ni_syscall /* sys_vserver */ + .long sys_ni_syscall /* mbind */ + .long sys_ni_syscall /* get_mempolicy */ + .long sys_ni_syscall /* set_mempolicy */ syscall_table_size=(.-sys_call_table) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/entry_trampoline.c 2004-02-12 00:41:23.000000000 -0800 @@ -0,0 +1,75 @@ +/* + * linux/arch/i386/kernel/entry_trampoline.c + * + * (C) Copyright 2003 Ingo Molnar + * + * This file contains the needed support code for 4GB userspace + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; + +void __init init_entry_mappings(void) +{ +#ifdef CONFIG_X86_HIGH_ENTRY + void *tramp; + + /* + * We need a high IDT and GDT for the 4G/4G split: + */ + trap_init_virtual_IDT(); + + __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); + __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); + tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); + + printk("mapped 4G/4G trampoline to %p.\n", tramp); + BUG_ON((void *)&__start___entry_text != tramp); + /* + * Virtual kernel stack: + */ + BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); + BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); + BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); + + /* + * set up the initial thread's virtual stack related + * fields: + */ + current->thread.stack_page0 = virt_to_page((char *)current->thread_info); + current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); + current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); + + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); + __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); + +#endif + printk("current: %p\n", current); + printk("current->thread_info: %p\n", current->thread_info); + current->thread_info->real_stack = (void *)current->thread_info; + current->thread_info->user_pgd = NULL; + current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; +} + + + +void __init entry_trampoline_setup(void) +{ + /* + * old IRQ entries set up by the boot code will still hang + * around - they are a sign of hw trouble anyway, now they'll + * produce a double fault message. + */ + trap_init_virtual_GDT(); +} --- linux-2.6.3-rc2/arch/i386/kernel/head.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/head.S 2004-02-12 00:41:23.000000000 -0800 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -325,12 +327,12 @@ rp_sidt: ret ENTRY(stack_start) - .long init_thread_union+8192 + .long init_thread_union+THREAD_SIZE .long __BOOT_DS /* This is the default interrupt "handler" :-) */ int_msg: - .asciz "Unknown interrupt\n" + .asciz "Unknown interrupt or fault at EIP %p %p %p\n" ALIGN ignore_int: cld @@ -342,9 +344,17 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es + pushl 16(%esp) + pushl 24(%esp) + pushl 32(%esp) + pushl 40(%esp) pushl $int_msg call printk popl %eax + popl %eax + popl %eax + popl %eax + popl %eax popl %ds popl %es popl %edx @@ -377,23 +387,27 @@ cpu_gdt_descr: .fill NR_CPUS-1,8,0 # space for the other GDT descriptors /* - * This is initialized to create an identity-mapping at 0-8M (for bootup - * purposes) and another mapping of the 0-8M area at virtual address + * This is initialized to create an identity-mapping at 0-16M (for bootup + * purposes) and another mapping of the 0-16M area at virtual address * PAGE_OFFSET. */ .org 0x1000 ENTRY(swapper_pg_dir) .long 0x00102007 .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 - /* default: 766 entries */ + .long 0x00104007 + .long 0x00105007 + .fill BOOT_USER_PGD_PTRS-4,4,0 + /* default: 764 entries */ .long 0x00102007 .long 0x00103007 - /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 + .long 0x00104007 + .long 0x00105007 + /* default: 252 entries */ + .fill BOOT_KERNEL_PGD_PTRS-4,4,0 /* - * The page tables are initialized to only 8MB here - the final page + * The page tables are initialized to only 16MB here - the final page * tables are set up later depending on memory size. */ .org 0x2000 @@ -402,15 +416,21 @@ ENTRY(pg0) .org 0x3000 ENTRY(pg1) +.org 0x4000 +ENTRY(pg2) + +.org 0x5000 +ENTRY(pg3) + /* * empty_zero_page must immediately follow the page tables ! (The * initialization loop counts until empty_zero_page) */ -.org 0x4000 +.org 0x6000 ENTRY(empty_zero_page) -.org 0x5000 +.org 0x7000 /* * Real beginning of normal "text" segment @@ -419,12 +439,12 @@ ENTRY(stext) ENTRY(_stext) /* - * This starts the data section. Note that the above is all - * in the text section because it has alignment requirements - * that we cannot fulfill any other way. + * This starts the data section. */ .data +.align PAGE_SIZE_asm + /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ @@ -439,7 +459,9 @@ ENTRY(boot_gdt_table) .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ #endif - .align L1_CACHE_BYTES + +.align PAGE_SIZE_asm + ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ --- linux-2.6.3-rc2/arch/i386/kernel/i386_ksyms.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/i386_ksyms.c 2004-02-12 00:41:23.000000000 -0800 @@ -97,7 +97,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter EXPORT_SYMBOL_NOVERS(__down_failed_trylock); EXPORT_SYMBOL_NOVERS(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); /* Delay loops */ EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__udelay); @@ -111,13 +110,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); +#if !defined(CONFIG_X86_UACCESS_INDIRECT) EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__direct_strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_from_user_ll); EXPORT_SYMBOL(__copy_to_user_ll); EXPORT_SYMBOL(strnlen_user); +#else /* CONFIG_X86_UACCESS_INDIRECT */ +EXPORT_SYMBOL(direct_csum_partial_copy_generic); +#endif EXPORT_SYMBOL(dma_alloc_coherent); EXPORT_SYMBOL(dma_free_coherent); --- linux-2.6.3-rc2/arch/i386/kernel/i387.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/i387.c 2004-02-12 00:41:23.000000000 -0800 @@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct * static int convert_fxsr_to_user( struct _fpstate __user *buf, struct i387_fxsave_struct *fxsave ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpreg __user *to; struct _fpxreg *from; @@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) return 1; - to = &buf->_st[0]; + to = tmp; from = (struct _fpxreg *) &fxsave->st_space[0]; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f+1); + to->exponent = from->exponent; } + if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) + return 1; return 0; } static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, struct _fpstate __user *buf ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpxreg *to; struct _fpreg __user *from; @@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) return 1; + if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) + return 1; fxsave->cwd = (unsigned short)(env[0] & 0xffff); fxsave->swd = (unsigned short)(env[1] & 0xffff); @@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc fxsave->fos = env[6]; to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; + from = tmp; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f + 1); + to->exponent = from->exponent; } return 0; } --- linux-2.6.3-rc2/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/init_task.c 2004-02-12 00:41:23.000000000 -0800 @@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm); */ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; + { INIT_THREAD_INFO(init_task, init_thread_union) }; /* * Initial task structure. @@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; --- linux-2.6.3-rc2/arch/i386/kernel/io_apic.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/io_apic.c 2004-02-12 00:41:08.000000000 -0800 @@ -317,8 +317,7 @@ struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) \ - ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -401,6 +400,7 @@ static void do_irq_balance(void) unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; + int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; @@ -452,7 +452,7 @@ static void do_irq_balance(void) for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); @@ -471,7 +471,7 @@ tryanothercpu: for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; @@ -551,9 +551,14 @@ tryanotherirq: * We seek the least loaded sibling by making the comparison * (A+B)/2 vs B */ - if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > - CPU_IRQ(cpu_sibling_map[min_loaded])) - min_loaded = cpu_sibling_map[min_loaded]; + load = CPU_IRQ(min_loaded) >> 1; + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + if (load > CPU_IRQ(j)) { + /* This won't change cpu_sibling_map[min_loaded] */ + load = CPU_IRQ(j); + min_loaded = j; + } + } cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); @@ -2150,6 +2155,10 @@ static inline void check_timer(void) { int pin1, pin2; int vector; + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2163,11 +2172,17 @@ static inline void check_timer(void) * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * disabled in the local APIC. Finally timer interrupts + * need to be acknowledged manually in the 8259A for + * do_slow_timeoffset() and for the i82489DX when using + * the NMI watchdog. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; + if (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)) + timer_ack = 1; + else + timer_ack = !cpu_has_tsc; enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); @@ -2185,7 +2200,8 @@ static inline void check_timer(void) disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); - check_nmi_watchdog(); + if (check_nmi_watchdog() < 0); + timer_ack = !cpu_has_tsc; } return; } @@ -2208,7 +2224,8 @@ static inline void check_timer(void) add_pin_to_irq(0, 0, pin2); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); - check_nmi_watchdog(); + if (check_nmi_watchdog() < 0); + timer_ack = !cpu_has_tsc; } return; } --- linux-2.6.3-rc2/arch/i386/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/irq.c 2004-02-12 00:41:17.000000000 -0800 @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -435,7 +437,7 @@ asmlinkage unsigned int do_IRQ(struct pt long esp; __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (8191)); + "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + 1024))) { printk("do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); @@ -508,6 +510,8 @@ out: irq_exit(); + kgdb_process_breakpoint(); + return 1; } @@ -927,7 +931,7 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 . static int irq_affinity_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -962,13 +966,77 @@ static int irq_affinity_write_proc(struc return full_count; } +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#include + +static void migrate_irqs_from(int cpu) +{ + cpumask_t mask; + unsigned int irq; + + mask = cpumask_of_cpu(cpu); + cpus_complement(mask); + cpus_and(mask, mask, cpu_online_map); + for (irq = 0; irq < NR_IRQS; irq++) { + cpus_and(irq_affinity[irq], irq_affinity[irq], mask); + if (cpus_empty(irq_affinity[irq])) + irq_affinity[irq] = cpumask_of_cpu(0); + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + } +} + +void enable_all_irqs(int cpu) +{ + cpumask_t mask; + unsigned int irq; + + mask = cpumask_of_cpu(cpu); + cpus_or(mask, mask, cpu_online_map); + for (irq = 0; irq < NR_IRQS; irq++) { + cpus_or(irq_affinity[irq], irq_affinity[irq], mask); + if (cpus_empty(irq_affinity[irq])) { + irq_affinity[irq] = cpumask_of_cpu(0); + } + + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + } +} + +static int irqs_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (int)hcpu; + switch(action) { + case CPU_ONLINE: + /* + * We could go through all the irqs and add + * this processor to the cpu set - zwane + */ + enable_all_irqs(cpu); + break; + case CPU_OFFLINE: + migrate_irqs_from(cpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata irqs_cpu_nb = { + .notifier_call = irqs_cpu_notify, +}; #endif static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -1051,5 +1119,9 @@ void init_irq_proc (void) */ for (i = 0; i < NR_IRQS; i++) register_irq_proc(i); +#ifdef CONFIG_HOTPLUG_CPU + register_cpu_notifier(&irqs_cpu_nb); +#endif + } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/kgdb_stub.c 2004-02-12 00:39:47.000000000 -0800 @@ -0,0 +1,2457 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned int esp; + int array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned int OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movl %eax,%esp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addl $-4,%ebx\n\t" + "movl (%ebx), %eax\n\t" + "pushl %eax\n\t" + "cmpl %esp,%ecx\n\t" + "jne 1b\n\t" + "popl %eax\n\t" + "popl %ebx\n\t" + "popl %ecx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("EAX=%08lx ", regs->eax); + printk("EBX=%08lx ", regs->ebx); + printk("ECX=%08lx ", regs->ecx); + printk("EDX=%08lx ", regs->edx); + printk("\n"); + printk("ESI=%08lx ", regs->esi); + printk("EDI=%08lx ", regs->edi); + printk("EBP=%08lx ", regs->ebp); + printk("ESP=%08lx ", (long) ®s->esp); + printk("\n"); + printk(" DS=%08x ", regs->xds); + printk(" ES=%08x ", regs->xes); + printk(" SS=%08x ", __KERNEL_DS); + printk(" FL=%08lx ", regs->eflags); + printk("\n"); + printk(" CS=%08x ", regs->xcs); + printk(" IP=%08lx ", regs->eip); +#if 0 + printk(" FS=%08x ", regs->fs); + printk(" GS=%08x ", regs->gs); +#endif + printk("\n"); + +} /* print_regs */ + +#define NEW_esp fn_call_lookaside[trap_cpu].esp + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ + gdb_regs[_ESP] = NEW_esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; + NEW_esp = gdb_regs[_ESP]; /* keep the value */ +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) +{ + unsigned long stack_page; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_ESP] = + (int) &kgdb_info.cpus_waiting[i].regs->esp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; + gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); + gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + stack_page = (unsigned long) p->thread_info; + if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (gdb_regs[_EBP] < stack_page || + gdb_regs[_EBP] > 8184 + stack_page) + return; + gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); + gdb_regs[_ESP] = gdb_regs[_EBP] + 8; + gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; + if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) + return; + } while (count++ < 16); + return; +} + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int mem_err = 0; +static volatile int mem_err_expected = 0; +static volatile int mem_err_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val, int may_fault) +{ + /* + * This code traps references to the area mapped to the kernel + * stack as given by the regs and, instead, stores to the + * fn_call_lookaside[cpu].array + */ + if (may_fault && + (unsigned int) addr < OLD_esp && + ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned flags; + int cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + kgdb_local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + int addr, length; + int breakno, breaktype; + char *ptr; + int newPC; + threadref thref; + int threadid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + int gdb_regs[NUMREGBYTES / 4]; + int dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) +#define NUMREGS NUMREGBYTES/4 + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + kgdb_local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time = 0, end_time, dum = 0; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movl %0,%%db7" + : /* no output */ + :"r"(0)); + + asm volatile ("movl %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (int) (&linux_regs->esp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES, 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). These may have + * meaning for ptrace, but for us it is safe to + * ignor them. We do this by dumping them into + * _GS which we also ignor, but do have memory for. + */ + int regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToInt(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + regno = + (regno >= NUMREGS ? _GS : regno); + hex2mem(ptr, (char *) &gdb_regs[regno], + 4, 0); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + mem2hex((char *) addr, + remcomOutBuffer, length, 1); + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { + hex2mem(ptr, (char *) addr, length, 1); + + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%x\n", addr); + + regs.eip = addr; + } + + newPC = regs.eip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + int *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (int); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->xcs; + *--ptr = linux_regs->eip; + *--ptr = linux_regs->ecx; + *--ptr = linux_regs->ebx; + *--ptr = linux_regs->eax; + linux_regs->ecx = NEW_esp - (sizeof (int) * 6); + linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->eip = (unsigned int) fn_call_stub; + } else { + linux_regs->eip = (unsigned int) fn_rtn_stub; + linux_regs->eax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + kgdb_local_irq_restore(flags); + return (0); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + kgdb_local_irq_restore(flags); + return (0); +} + +/* this function is used to set up exception handlers for tracing and + * breakpoints. + * This function is not needed as the above line does all that is needed. + * We leave it for backward compatitability... + */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + + * But really folks, every hear of labeled common, an old Fortran + * concept. Lots of folks can reference it and it is define if + * anyone does. Only one can initialize it at link time. We do + * this with the hook. See the statement above. No need for any + * executable code and it is ready as soon as the kernel is + * loaded. Very desirable in kernel debugging. + + linux_debug_hook = handle_exception ; + */ + + /* In case GDB is started before us, ack any packets (presumably + "$?#xx") sitting there. + putDebugChar ('+'); + + initialized = 1; + */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + kgdb_local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + kgdb_local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +} + --- linux-2.6.3-rc2/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/ldt.c 2004-02-12 00:41:23.000000000 -0800 @@ -2,7 +2,7 @@ * linux/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 1999, 2003 Ingo Molnar */ #include @@ -18,6 +18,8 @@ #include #include #include +#include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) @@ -29,34 +31,31 @@ static void flush_ldt(void *null) static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; - int oldsize; + int oldsize, newsize, i; if (mincount <= pc->size) return 0; + /* + * LDT got larger - reallocate if necessary. + */ oldsize = pc->size; mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); + newsize = mincount*LDT_ENTRY_SIZE; + for (i = 0; i < newsize; i += PAGE_SIZE) { + int nr = i/PAGE_SIZE; + BUG_ON(i >= 64*1024); + if (!pc->ldt_pages[nr]) { + pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); + if (!pc->ldt_pages[nr]) + return -ENOMEM; + clear_highpage(pc->ldt_pages[nr]); + } + } pc->size = mincount; - wmb(); - if (reload) { #ifdef CONFIG_SMP cpumask_t mask; + preempt_disable(); load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); @@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i load_LDT(pc); #endif } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } return 0; } static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { - int err = alloc_ldt(new, old->size, 0); - if (err < 0) + int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; + + err = alloc_ldt(new, size, 0); + if (err < 0) { + new->size = 0; return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + } + for (i = 0; i < nr_pages; i++) + copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); return 0; } @@ -96,6 +94,7 @@ int init_new_context(struct task_struct init_MUTEX(&mm->context.sem); mm->context.size = 0; + memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); @@ -107,23 +106,21 @@ int init_new_context(struct task_struct /* * No need to lock the MM as we are the last user + * Do not touch the ldt register, we are already + * in the next thread. */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } + int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) + __free_page(mm->context.ldt_pages[i]); + mm->context.size = 0; } static int read_ldt(void __user * ptr, unsigned long bytecount) { - int err; + int err, i; unsigned long size; struct mm_struct * mm = current->mm; @@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u size = bytecount; err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; + /* + * This is necessary just in case we got here straight from a + * context-switch where the ptes were set but no tlb flush + * was done yet. We rather avoid doing a TLB flush in the + * context-switch path and do it here instead. + */ + __flush_tlb_global(); + + for (i = 0; i < size; i += PAGE_SIZE) { + int nr = i / PAGE_SIZE, bytes; + char *kaddr = kmap(mm->context.ldt_pages[nr]); + + bytes = size - i; + if (bytes > PAGE_SIZE) + bytes = PAGE_SIZE; + if (copy_to_user(ptr + i, kaddr, size - i)) + err = -EFAULT; + kunmap(mm->context.ldt_pages[nr]); + } up(&mm->context.sem); if (err < 0) return err; @@ -158,7 +172,7 @@ static int read_default_ldt(void __user err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = 5*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); + /* + * No rescheduling allowed from this point to the install. + * + * We do a TLB flush for the same reason as in the read_ldt() path. + */ + preempt_disable(); + __flush_tlb_global(); + lp = (__u32 *) ((ldt_info.entry_number << 3) + + (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { @@ -221,6 +243,7 @@ install: *lp = entry_1; *(lp+1) = entry_2; error = 0; + preempt_enable(); out_unlock: up(&mm->context.sem); @@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, } return ret; } + +/* + * load one particular LDT into the current CPU + */ +void load_LDT_nolock(mm_context_t *pc, int cpu) +{ + struct page **pages = pc->ldt_pages; + int count = pc->size; + int nr_pages, i; + + if (likely(!count)) { + pages = &default_ldt_page; + count = 5; + } + nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) { + __kunmap_atomic_type(KM_LDT_PAGE0 - i); + __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); + } + set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); + load_LDT_desc(); +} --- linux-2.6.3-rc2/arch/i386/kernel/Makefile 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/Makefile 2004-02-12 00:41:23.000000000 -0800 @@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o + doublefault.o entry_trampoline.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o @@ -31,6 +32,7 @@ obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o EXTRA_AFLAGS := -traditional --- linux-2.6.3-rc2/arch/i386/kernel/microcode.c 2003-10-25 14:45:44.000000000 -0700 +++ 25/arch/i386/kernel/microcode.c 2004-02-12 00:40:36.000000000 -0800 @@ -507,3 +507,4 @@ static void __exit microcode_exit (void) module_init(microcode_init) module_exit(microcode_exit) +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); --- linux-2.6.3-rc2/arch/i386/kernel/mpparse.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/mpparse.c 2004-02-12 00:41:23.000000000 -0800 @@ -634,7 +634,7 @@ void __init get_smp_config (void) /* * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_HT). Note that + * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -668,7 +668,7 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); @@ -940,7 +940,7 @@ void __init mp_override_legacy_irq ( * erroneously sets the trigger to level, resulting in a HUGE * increase of timer interrupts! */ - if ((bus_irq == 0) && (global_irq == 2) && (trigger == 3)) + if ((bus_irq == 0) && (trigger == 3)) trigger = 1; intsrc.mpc_type = MP_INTSRC; @@ -961,7 +961,7 @@ void __init mp_override_legacy_irq ( * Otherwise create a new entry (e.g. global_irq == 2). */ for (i = 0; i < mp_irq_entries; i++) { - if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic) + if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus) && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) { mp_irqs[i] = intsrc; found = 1; @@ -1008,9 +1008,10 @@ void __init mp_config_acpi_legacy_irqs ( */ for (i = 0; i < 16; i++) { - if (i == 2) continue; /* Don't connect IRQ2 */ + if (i == 2) + continue; /* Don't connect IRQ2 */ - intsrc.mpc_irqtype = i ? mp_INT : mp_ExtINT; /* 8259A to #0 */ + intsrc.mpc_irqtype = mp_INT; intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; --- linux-2.6.3-rc2/arch/i386/kernel/nmi.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/nmi.c 2004-02-12 00:41:17.000000000 -0800 @@ -31,7 +31,16 @@ #include #include +#ifdef CONFIG_KGDB +#include +#ifdef CONFIG_SMP +unsigned int nmi_watchdog = NMI_IO_APIC; +#else +unsigned int nmi_watchdog = NMI_LOCAL_APIC; +#endif +#else unsigned int nmi_watchdog = NMI_NONE; +#endif static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ extern void show_registers(struct pt_regs *regs); @@ -42,7 +51,7 @@ extern void show_registers(struct pt_reg * be enabled * -1: the lapic NMI watchdog is disabled, but can be enabled */ -static int nmi_active; +int nmi_active; #define K7_EVNTSEL_ENABLE (1 << 22) #define K7_EVNTSEL_INT (1 << 20) @@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; } +#ifdef CONFIG_KGDB +int tune_watchdog = 5*HZ; +#endif void nmi_watchdog_tick (struct pt_regs * regs) { @@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; +#ifdef CONFIG_KGDB + if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { + +#else if (last_irq_sums[cpu] == sum) { +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; +#ifdef CONFIG_KGDB + if (alert_counter[cpu] == tune_watchdog) { + kgdb_handle_exception(2, SIGPWR, 0, regs); + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +#endif if (alert_counter[cpu] == 5*nmi_hz) { spin_lock(&nmi_print_lock); /* @@ -462,6 +486,7 @@ void nmi_watchdog_tick (struct pt_regs * } } +EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(disable_lapic_nmi_watchdog); EXPORT_SYMBOL(enable_lapic_nmi_watchdog); --- linux-2.6.3-rc2/arch/i386/kernel/process.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/process.c 2004-02-12 00:41:23.000000000 -0800 @@ -14,6 +14,7 @@ #define __KERNEL_SYSCALLS__ #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #ifdef CONFIG_MATH_EMULATION #include #endif @@ -54,6 +56,9 @@ #include #include +#include +#include + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int hlt_counter; @@ -132,6 +137,60 @@ static void poll_idle (void) } } +#ifdef CONFIG_HOTPLUG_CPU + +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void check_cpu_quiescent(void) +{ + if (unlikely(__get_cpu_var(cpu_state) == CPU_OFFLINE)) { + int cpu = smp_processor_id(); + + spin_lock(&call_lock); + local_irq_disable(); + preempt_disable(); + + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + BUG_ON(cpu_isset(cpu, cpu_online_map)); + BUG_ON(!cpu_isset(cpu, cpu_active_map)); + cpu_clear(cpu, cpu_active_map); + spin_unlock(&call_lock); + + /* Death loop */ + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + /* Even with irqs disabled, this is safe, since no + * smp_call_funcion can be headed for us now + * (!cpu_active). */ + spin_lock(&call_lock); + + /* from hereon we're ready to do work */ + __get_cpu_var(cpu_state) = CPU_ONLINE; + wmb(); + + //printk("Cpu %u arisen\n", smp_processor_id()); + + /* Put ourselves online before doing ___flush_tlb_all, + * so we avoid losing one to a race. */ + cpu_set(smp_processor_id(), cpu_active_map); + cpu_set(smp_processor_id(), cpu_online_map); + wmb(); + spin_unlock(&call_lock); + + __flush_tlb_all(); + local_irq_enable(); + + preempt_enable(); + } +} +#else +static inline void check_cpu_quiescent(void) +{ +} +#endif /* CONFIG_HOTPLUG_CPU */ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a @@ -148,6 +207,7 @@ void cpu_idle (void) if (!idle) idle = default_idle; + check_cpu_quiescent(); irq_stat[smp_processor_id()].idle_timestamp = jiffies; idle(); } @@ -304,6 +364,9 @@ void flush_thread(void) struct task_struct *tsk = current; memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); +#ifdef CONFIG_X86_HIGH_ENTRY + clear_thread_flag(TIF_DB7); +#endif memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -317,9 +380,8 @@ void release_thread(struct task_struct * if (dead_task->mm) { // temporary debugging check if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", + printk("WARNING: dead process %8s still has LDT? <%d>\n", dead_task->comm, - dead_task->mm->context.ldt, dead_task->mm->context.size); BUG(); } @@ -354,7 +416,17 @@ int copy_thread(int nr, unsigned long cl p->thread.esp = (unsigned long) childregs; p->thread.esp0 = (unsigned long) (childregs+1); + /* + * get the two stack pages, for the virtual stack. + * + * IMPORTANT: this code relies on the fact that the task + * structure is an 8K aligned piece of physical memory. + */ + p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); + p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); + p->thread.eip = (unsigned long) ret_from_fork; + p->thread_info->real_stack = p->thread_info; savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); @@ -506,10 +578,41 @@ struct task_struct * __switch_to(struct __unlazy_fpu(prev_p); +#ifdef CONFIG_X86_HIGH_ENTRY + /* + * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is + * needed because otherwise NMIs could interrupt the + * user-return code with a virtual stack and stale TLBs.) + */ + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(next->stack_page0, KM_VSTACK0); + __kmap_atomic(next->stack_page1, KM_VSTACK1); + + /* + * NOTE: here we rely on the task being the stack as well + */ + next_p->thread_info->virtual_stack = + (void *)__kmap_atomic_vaddr(KM_VSTACK0); + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) + /* + * If next was preempted on entry from userspace to kernel, + * and now it's on a different cpu, we need to adjust %esp. + * This assumes that entry.S does not copy %esp while on the + * virtual stack (with interrupts enabled): which is so, + * except within __SWITCH_KERNELSPACE itself. + */ + if (unlikely(next->esp >= TASK_SIZE)) { + next->esp &= THREAD_SIZE - 1; + next->esp |= (unsigned long) next_p->thread_info->virtual_stack; + } +#endif +#endif /* * Reload esp0, LDT and the page table pointer: */ - load_esp0(tss, next); + load_virtual_esp0(tss, next_p); /* * Load the per-thread Thread-Local Storage descriptor. @@ -637,6 +740,8 @@ extern void scheduling_functions_start_h extern void scheduling_functions_end_here(void); #define first_sched ((unsigned long) scheduling_functions_start_here) #define last_sched ((unsigned long) scheduling_functions_end_here) +#define top_esp (THREAD_SIZE - sizeof(unsigned long)) +#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) unsigned long get_wchan(struct task_struct *p) { @@ -647,12 +752,12 @@ unsigned long get_wchan(struct task_stru return 0; stack_page = (unsigned long)p->thread_info; esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > 8188+stack_page) + if (!stack_page || esp < stack_page || esp > top_esp+stack_page) return 0; /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ebp = *(unsigned long *) esp; do { - if (ebp < stack_page || ebp > 8184+stack_page) + if (ebp < stack_page || ebp > top_ebp+stack_page) return 0; eip = *(unsigned long *) (ebp+4); if (eip < first_sched || eip >= last_sched) --- linux-2.6.3-rc2/arch/i386/kernel/reboot.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/reboot.c 2004-02-12 00:41:23.000000000 -0800 @@ -155,12 +155,11 @@ void machine_real_restart(unsigned char CMOS_WRITE(0x00, 0x8f); spin_unlock_irqrestore(&rtc_lock, flags); - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); + /* + * Remap the first 16 MB of RAM (which includes the kernel image) + * at virtual address zero: + */ + setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); /* * Use `swapper_pg_dir' as our page directory. --- linux-2.6.3-rc2/arch/i386/kernel/setup.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/setup.c 2004-02-12 00:41:12.000000000 -0800 @@ -588,9 +588,12 @@ static void __init parse_cmdline_early ( #endif /* CONFIG_ACPI_BOOT */ /* - * highmem=size forces highmem to be exactly 'size' bytes. + * highmem=size forces highmem to be at most 'size' bytes. * This works even on boxes that have no highmem otherwise. * This also works to reduce highmem size on bigger boxes. + * + * Note: highmem sise is adjusted downward for proper zone + * alignment of the highmem physical start address. */ if (c == ' ' && !memcmp(from, "highmem=", 8)) highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; @@ -657,6 +660,11 @@ void __init find_max_pfn(void) /* * Determine low and high memory ranges: */ + +#define ZONE_REQUIRED_PAGE_ALIGNMENT (1UL << (MAX_ORDER-1)) +#define ZONE_REQUIRED_PAGE_ALIGNMENT_MASK (ZONE_REQUIRED_PAGE_ALIGNMENT-1) +#define PAGES_FOR_64MB (64*1024*1024/PAGE_SIZE) + unsigned long __init find_max_low_pfn(void) { unsigned long max_low_pfn; @@ -668,14 +676,16 @@ unsigned long __init find_max_low_pfn(vo if (highmem_pages + MAXMEM_PFN < max_pfn) max_pfn = MAXMEM_PFN + highmem_pages; if (highmem_pages + MAXMEM_PFN > max_pfn) { - printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages)); - highmem_pages = 0; + printk("Warning reducing highmem=%uMB to: %luMB.\n", + pages_to_mb(highmem_pages), + pages_to_mb((max_pfn - MAXMEM_PFN))); + highmem_pages = max_pfn - MAXMEM_PFN; } max_low_pfn = MAXMEM_PFN; #ifndef CONFIG_HIGHMEM /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - MAXMEM>>20); + printk(KERN_WARNING "Warning only %luMB will be used.\n", + MAXMEM >> 20); if (max_pfn > MAX_NONPAE_PFN) printk(KERN_WARNING "Use a PAE enabled kernel.\n"); else @@ -690,26 +700,63 @@ unsigned long __init find_max_low_pfn(vo } #endif /* !CONFIG_X86_PAE */ #endif /* !CONFIG_HIGHMEM */ - } else { - if (highmem_pages == -1) - highmem_pages = 0; + } else if (highmem_pages == -1) + highmem_pages = 0; #ifdef CONFIG_HIGHMEM - if (highmem_pages >= max_pfn) { - printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); - highmem_pages = 0; - } - if (highmem_pages) { - if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){ - printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages)); - highmem_pages = 0; - } - max_low_pfn -= highmem_pages; - } + if (!highmem_pages) + goto out; + if (max_pfn < PAGES_FOR_64MB + ZONE_REQUIRED_PAGE_ALIGNMENT * 2) { + printk(KERN_ERR "Error highmem support requires at least %luMB " + "but only %luMB are available.\n", + pages_to_mb(PAGES_FOR_64MB + + ZONE_REQUIRED_PAGE_ALIGNMENT * 2), + pages_to_mb(max_pfn)); + highmem_pages = 0; + goto out; + } + if (highmem_pages > max_pfn) { + printk(KERN_WARNING "Warning highmem=%uMB is bigger than " + "available %luMB and will be adjusted.\n", + pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); + } + if (highmem_pages <= ZONE_REQUIRED_PAGE_ALIGNMENT) { + printk(KERN_WARNING "Warning highmem=%uMB is too small and has " + "been adjusted to: %luMB.\n", + pages_to_mb(highmem_pages), + pages_to_mb(ZONE_REQUIRED_PAGE_ALIGNMENT * 2)); + highmem_pages = ZONE_REQUIRED_PAGE_ALIGNMENT * 2; + } + if (max_low_pfn < highmem_pages || + max_low_pfn-highmem_pages < PAGES_FOR_64MB){ + highmem_pages = max_low_pfn - PAGES_FOR_64MB; + printk(KERN_WARNING "Warning highmem size adjusted for a " + "minimum of 64MB lowmem to: %uMB.\n", + pages_to_mb(highmem_pages)); + } + max_low_pfn -= highmem_pages; + + if (max_low_pfn & ZONE_REQUIRED_PAGE_ALIGNMENT_MASK) { + printk(KERN_WARNING "Warning bad highmem zone alignment 0x%lx, " + "highmem size will be adjusted.\n", + (max_low_pfn & ZONE_REQUIRED_PAGE_ALIGNMENT_MASK) << + PAGE_SHIFT); + highmem_pages -= ZONE_REQUIRED_PAGE_ALIGNMENT - + (max_low_pfn & ZONE_REQUIRED_PAGE_ALIGNMENT_MASK); + max_low_pfn &= ~ZONE_REQUIRED_PAGE_ALIGNMENT_MASK; + max_low_pfn += ZONE_REQUIRED_PAGE_ALIGNMENT; + printk(KERN_WARNING "Warning lowmem size adjusted for zone " + "alignment to: %luMB.\n", + pages_to_mb(max_low_pfn)); + printk(KERN_WARNING "Warning highmem size adjusted for zone " + "alignment to: %uMB.\n", + pages_to_mb(highmem_pages)); + } +out: #else - if (highmem_pages) - printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); + if (highmem_pages) + printk(KERN_ERR "ignoring highmem size on non-highmem " + "kernel!\n"); #endif - } return max_low_pfn; } @@ -1118,6 +1165,19 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + extern void setup_early_printk(char *); + + setup_early_printk(s); + printk("early console enabled\n"); + } + } +#endif + + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH --- linux-2.6.3-rc2/arch/i386/kernel/signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/kernel/signal.c 2004-02-12 00:41:23.000000000 -0800 @@ -128,28 +128,29 @@ sys_sigaltstack(const stack_t __user *us */ static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) +restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *__sc, int *peax) { - unsigned int err = 0; + struct sigcontext scratch; /* 88 bytes of scratch area */ /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) + if (copy_from_user(&scratch, __sc, sizeof(scratch))) + return -EFAULT; + +#define COPY(x) regs->x = scratch.x #define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ regs->x##seg = tmp; } #define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ regs->x##seg = tmp|3; } #define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ loadsegment(seg,tmp); } GET_SEG(gs); @@ -168,27 +169,23 @@ restore_sigcontext(struct pt_regs *regs, COPY_SEG_STRICT(ss); { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); + unsigned int tmpflags = scratch.eflags; regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); regs->orig_eax = -1; /* disable syscall checks */ } { - struct _fpstate __user * buf; - err |= __get_user(buf, &sc->fpstate); + struct _fpstate * buf = scratch.fpstate; if (buf) { if (verify_area(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); + return -EFAULT; + if (restore_i387(buf)) + return -EFAULT; } } - err |= __get_user(*peax, &sc->eax); - return err; - -badframe: - return 1; + *peax = scratch.eax; + return 0; } asmlinkage int sys_sigreturn(unsigned long __unused) @@ -266,46 +263,47 @@ badframe: */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate, struct pt_regs *regs, unsigned long mask) { - int tmp, err = 0; + struct sigcontext sc; /* 88 bytes of scratch area */ + int tmp; tmp = 0; __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->gs); + *(unsigned int *)&sc.gs = tmp; __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->fs); - - err |= __put_user(regs->xes, (unsigned int *)&sc->es); - err |= __put_user(regs->xds, (unsigned int *)&sc->ds); - err |= __put_user(regs->edi, &sc->edi); - err |= __put_user(regs->esi, &sc->esi); - err |= __put_user(regs->ebp, &sc->ebp); - err |= __put_user(regs->esp, &sc->esp); - err |= __put_user(regs->ebx, &sc->ebx); - err |= __put_user(regs->edx, &sc->edx); - err |= __put_user(regs->ecx, &sc->ecx); - err |= __put_user(regs->eax, &sc->eax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->eip, &sc->eip); - err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); - err |= __put_user(regs->eflags, &sc->eflags); - err |= __put_user(regs->esp, &sc->esp_at_signal); - err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + *(unsigned int *)&sc.fs = tmp; + *(unsigned int *)&sc.es = regs->xes; + *(unsigned int *)&sc.ds = regs->xds; + sc.edi = regs->edi; + sc.esi = regs->esi; + sc.ebp = regs->ebp; + sc.esp = regs->esp; + sc.ebx = regs->ebx; + sc.edx = regs->edx; + sc.ecx = regs->ecx; + sc.eax = regs->eax; + sc.trapno = current->thread.trap_no; + sc.err = current->thread.error_code; + sc.eip = regs->eip; + *(unsigned int *)&sc.cs = regs->xcs; + sc.eflags = regs->eflags; + sc.esp_at_signal = regs->esp; + *(unsigned int *)&sc.ss = regs->xss; tmp = save_i387(fpstate); if (tmp < 0) - err = 1; - else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + return 1; + sc.fpstate = tmp ? fpstate : NULL; /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + sc.oldmask = mask; + sc.cr2 = current->thread.cr2; - return err; + if (copy_to_user(__sc, &sc, sizeof(sc))) + return 1; + return 0; } /* @@ -443,7 +441,7 @@ static void setup_rt_frame(int sig, stru /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->esp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); --- linux-2.6.3-rc2/arch/i386/kernel/smpboot.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/i386/kernel/smpboot.c 2004-02-12 00:40:41.000000000 -0800 @@ -39,10 +39,14 @@ #include #include +#include #include #include #include #include +#include +#include +#include #include #include @@ -65,7 +69,12 @@ int phys_proc_id[NR_CPUS]; /* Package ID /* bitmap of online cpus */ cpumask_t cpu_online_map; -static cpumask_t cpu_callin_map; +#ifdef CONFIG_HOTPLUG_CPU +cpumask_t cpu_active_map; +#endif + +/* Initialize, although master cpu never calls in */ +static volatile cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; static cpumask_t smp_commenced_mask; @@ -83,6 +92,9 @@ extern unsigned char trampoline_data []; extern unsigned char trampoline_end []; static unsigned char *trampoline_base; +/* State of each CPU. */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller @@ -457,7 +469,9 @@ int __init start_secondary(void *unused) * the local TLBs too. */ local_flush_tlb(); + /* cpu_set should suffice for this stage of bootup -zwane*/ cpu_set(smp_processor_id(), cpu_online_map); + cpu_set(smp_processor_id(), cpu_active_map); wmb(); return cpu_idle(); } @@ -934,7 +948,7 @@ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -952,6 +966,8 @@ static void __init smp_boot_cpus(unsigne current_thread_info()->cpu = 0; smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); /* * If we couldn't find an SMP configuration at boot time, @@ -1080,32 +1096,34 @@ static void __init smp_boot_cpus(unsigne Dprintk("Boot done.\n"); /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); } smpboot_setup_io_apic(); @@ -1119,33 +1137,325 @@ static void __init smp_boot_cpus(unsigne synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->flags |= SD_FLAG_IDLE; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_online_map; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_online_map); + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = cpu_sched_domain(j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_online_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + + mb(); + for_each_cpu_mask(i, cpu_online_map) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + struct sched_group *node_group = &sched_group_nodes[node]; + + cpu_domain->parent = phys_domain; + phys_domain->parent = node_domain; + + node_domain->groups = node_group; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#else /* CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_online_map; + phys_domain->flags |= SD_FLAG_IDLE; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + cpu_domain->parent = phys_domain; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) { + smp_commenced_mask = cpumask_of_cpu(0); + cpu_callin_map = cpumask_of_cpu(0); + mb(); smp_boot_cpus(max_cpus); } void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); + cpu_set(smp_processor_id(), cpu_active_map); cpu_set(smp_processor_id(), cpu_callout_map); } +#ifdef CONFIG_HOTPLUG_CPU +/* must be called with the cpucontrol mutex held */ +static int __devinit cpu_enable(unsigned int cpu) +{ + /* get the target out of it's holding state */ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + wmb(); + + /* wait for the processor to ack it. timeout? */ + while (!cpu_online(cpu)) + cpu_relax(); + return 0; +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + /* + * Nothing for now, perhaps use cpufreq to drop frequency, + * but that could go into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + BUG_ON(!cpu_isset(cpu, cpu_active_map)); + BUG_ON(!cpu_isset(cpu, cpu_online_map)); + wmb(); + cpu_clear(cpu, cpu_online_map); + wmb(); + + return 0; +} + +static void do_nothing(void *unused) +{ + return; +} + +void __cpu_die(unsigned int cpu) +{ + unsigned int i; + + /* Final threads can take some time to actually clean up */ + while (!idle_cpu(cpu)) + yield(); + + per_cpu(cpu_state, cpu) = CPU_OFFLINE; + wmb(); + for (i = 0; i < 10; i++) { + wake_idle_cpu(cpu); + /* They ack this in check_cpu_quiescent by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + /* + * This prevents a race in smp_call_function due + * to the rapid online of the same CPU which just died. + */ + smp_call_function(do_nothing, NULL, 1, 1); + return; + } + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + int __devinit __cpu_up(unsigned int cpu) { /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { + if (cpu_isset(cpu, smp_commenced_mask) && cpu_online(cpu)) { local_irq_enable(); return -ENOSYS; } /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { + printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); local_irq_enable(); return -EIO; } +#ifdef CONFIG_HOTPLUG_CPU + /* Already up, and in cpu_quiescent now? */ + if (cpu_isset(cpu, smp_commenced_mask)) { + cpu_enable(cpu); + /* we can simply fall through */ + } +#endif + local_irq_enable(); /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); --- linux-2.6.3-rc2/arch/i386/kernel/smp.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/smp.c 2004-02-12 00:41:23.000000000 -0800 @@ -327,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt if (flush_mm == cpu_tlbstate[cpu].active_mm) { if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { +#ifndef CONFIG_X86_SWITCH_PAGETABLES if (flush_va == FLUSH_ALL) local_flush_tlb(); else __flush_tlb_one(flush_va); +#endif } else leave_mm(cpu); } @@ -355,11 +357,15 @@ static void flush_tlb_others(cpumask_t c */ BUG_ON(cpus_empty(cpumask)); - cpus_and(tmp, cpumask, cpu_online_map); + cpus_and(tmp, cpumask, cpu_callout_map); BUG_ON(!cpus_equal(cpumask, tmp)); BUG_ON(cpu_isset(smp_processor_id(), cpumask)); BUG_ON(!mm); + cpus_and(cpumask, cpumask, cpu_active_map); + if (cpus_empty(cpumask)) + return; + /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -387,30 +393,17 @@ static void flush_tlb_others(cpumask_t c */ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); - while (!cpus_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ + do { mb(); + tmp = flush_cpumask; + cpus_and(tmp, tmp, cpu_active_map); + } while (!cpus_empty(tmp)); flush_mm = NULL; flush_va = 0; spin_unlock(&tlbstate_lock); } -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - preempt_enable(); -} - void flush_tlb_mm (struct mm_struct * mm) { cpumask_t cpu_mask; @@ -442,7 +435,10 @@ void flush_tlb_page(struct vm_area_struc if (current->active_mm == mm) { if(current->mm) - __flush_tlb_one(va); +#ifndef CONFIG_X86_SWITCH_PAGETABLES + __flush_tlb_one(va) +#endif + ; else leave_mm(smp_processor_id()); } @@ -466,7 +462,17 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, 0, 1, 1); } - +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing @@ -481,13 +487,13 @@ void smp_send_reschedule(int cpu) * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; +spinlock_t call_lock = SPIN_LOCK_UNLOCKED; struct call_data_struct { void (*func) (void *info); void *info; - atomic_t started; - atomic_t finished; + cpumask_t not_started; + cpumask_t not_finished; int wait; }; @@ -514,32 +520,44 @@ int smp_call_function (void (*func) (voi */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; + cpumask_t mask; + int cpu; - if (!cpus) - return 0; + spin_lock(&call_lock); + cpu = smp_processor_id(); data.func = func; data.info = info; - atomic_set(&data.started, 0); + data.not_started = cpu_active_map; + cpu_clear(cpu, data.not_started); + if (cpus_empty(data.not_started)) + goto out_unlock; + data.wait = wait; if (wait) - atomic_set(&data.finished, 0); + data.not_finished = data.not_started; - spin_lock(&call_lock); call_data = &data; mb(); /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); + send_IPI_mask(data.not_started, CALL_FUNCTION_VECTOR); /* Wait for response */ - while (atomic_read(&data.started) != cpus) - barrier(); + do { + mb(); + mask = data.not_started; + cpus_and(mask, mask, cpu_active_map); + } while(!cpus_empty(mask)); if (wait) - while (atomic_read(&data.finished) != cpus) - barrier(); + do { + mb(); + mask = data.not_finished; + cpus_and(mask, mask, cpu_active_map); + } while(!cpus_empty(mask)); + +out_unlock: spin_unlock(&call_lock); return 0; @@ -551,6 +569,7 @@ static void stop_this_cpu (void * dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); + cpu_clear(smp_processor_id(), cpu_active_map); local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) @@ -583,17 +602,25 @@ asmlinkage void smp_reschedule_interrupt asmlinkage void smp_call_function_interrupt(void) { - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; + void (*func) (void *info); + void *info; + int wait; + int cpu = smp_processor_id(); ack_APIC_irq(); + + func = call_data->func; + info = call_data->info; + wait = call_data->wait; + /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function */ - mb(); - atomic_inc(&call_data->started); + smp_mb__before_clear_bit(); + cpu_clear(cpu, call_data->not_started); + smp_mb__after_clear_bit(); + /* * At this point the info structure may be out of scope unless wait==1 */ @@ -602,8 +629,9 @@ asmlinkage void smp_call_function_interr irq_exit(); if (wait) { - mb(); - atomic_inc(&call_data->finished); + smp_mb__before_clear_bit(); + cpu_clear(cpu, call_data->not_finished); + smp_mb__after_clear_bit(); } } --- linux-2.6.3-rc2/arch/i386/kernel/sysenter.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/sysenter.c 2004-02-12 00:41:23.000000000 -0800 @@ -18,13 +18,18 @@ #include #include #include +#include extern asmlinkage void sysenter_entry(void); void enable_sep_cpu(void *info) { int cpu = get_cpu(); +#ifdef CONFIG_X86_HIGH_ENTRY + struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; +#else struct tss_struct *tss = init_tss + cpu; +#endif tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; --- linux-2.6.3-rc2/arch/i386/kernel/sys_i386.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/i386/kernel/sys_i386.c 2004-02-12 00:40:54.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -106,8 +107,6 @@ out: } -extern asmlinkage int sys_select(int, fd_set __user *, fd_set __user *, fd_set __user *, struct timeval __user *); - struct sel_arg_struct { unsigned long n; fd_set __user *inp, *outp, *exp; --- linux-2.6.3-rc2/arch/i386/kernel/timers/common.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/timers/common.c 2004-02-12 00:40:02.000000000 -0800 @@ -137,3 +137,23 @@ bad_calibration: } #endif +/* calculate cpu_khz */ +void __init init_cpu_khz(void) +{ + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if (tsc_quotient) { + /* report CPU clock rate in Hz. + * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + { unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (cpu_khz), "=d" (edx) + :"r" (tsc_quotient), + "0" (eax), "1" (edx)); + printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + } + } + } +} --- linux-2.6.3-rc2/arch/i386/kernel/timers/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/timers/Makefile 2004-02-12 00:40:02.000000000 -0800 @@ -6,3 +6,4 @@ obj-y := timer.o timer_none.o timer_tsc. obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o obj-$(CONFIG_HPET_TIMER) += timer_hpet.o +obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o --- linux-2.6.3-rc2/arch/i386/kernel/timers/timer.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/timers/timer.c 2004-02-12 00:40:02.000000000 -0800 @@ -19,6 +19,9 @@ static struct timer_opts* timers[] = { #ifdef CONFIG_HPET_TIMER &timer_hpet, #endif +#ifdef CONFIG_X86_PM_TIMER + &timer_pmtmr, +#endif &timer_tsc, &timer_pit, NULL, --- linux-2.6.3-rc2/arch/i386/kernel/timers/timer_cyclone.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/timers/timer_cyclone.c 2004-02-12 00:40:02.000000000 -0800 @@ -212,26 +212,7 @@ static int __init init_cyclone(char* ove } } - /* init cpu_khz. - * XXX - This should really be done elsewhere, - * and in a more generic fashion. -johnstul@us.ibm.com - */ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - } - } - } + init_cpu_khz(); /* Everything looks good! */ return 0; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/timers/timer_pm.c 2004-02-12 00:40:03.000000000 -0800 @@ -0,0 +1,217 @@ +/* + * (C) Dominik Brodowski 2003 + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/acpi/boot.c */ +u32 pmtmr_ioport = 0; + + +/* value of the Power timer at last timer interrupt */ +static u32 offset_tick; +static u32 offset_delay; + +static unsigned long long monotonic_base; +static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +/*helper function to safely read acpi pm timesource*/ +static inline u32 read_pmtmr(void) +{ + u32 v1=0,v2=0,v3=0; + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; +} + +static int init_pmtmr(char* override) +{ + u32 value1, value2; + unsigned int i; + + if (override[0] && strncmp(override,"pmtmr",5)) + return -ENODEV; + + if (!pmtmr_ioport) + return -ENODEV; + + /* we use the TSC for delay_pmtmr, so make sure it exists */ + if (!cpu_has_tsc) + return -ENODEV; + + /* "verify" this timing source */ + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); + if (value2 == value1) + continue; + if (value2 > value1) + goto pm_good; + if ((value2 < value1) && ((value2) < 0xFFF)) + goto pm_good; + printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); + return -EINVAL; + } + printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); + return -ENODEV; + +pm_good: + init_cpu_khz(); + return 0; +} + +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} + +/* + * this gets called during each timer interrupt + * - Called while holding the writer xtime_lock + */ +static void mark_offset_pmtmr(void) +{ + u32 lost, delta, last_offset; + static int first_run = 1; + last_offset = offset_tick; + + write_seqlock(&monotonic_lock); + + offset_tick = read_pmtmr(); + + /* calculate tick interval */ + delta = (offset_tick - last_offset) & ACPI_PM_MASK; + + /* convert to usecs */ + delta = cyc2us(delta); + + /* update the monotonic base value */ + monotonic_base += delta * NSEC_PER_USEC; + write_sequnlock(&monotonic_lock); + + /* convert to ticks */ + delta += offset_delay; + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + + /* compensate for lost ticks */ + if (lost >= 2) + jiffies_64 += lost - 1; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } +} + + +static unsigned long long monotonic_clock_pmtmr(void) +{ + u32 last_offset, this_offset; + unsigned long long base, ret; + unsigned seq; + + + /* atomically read monotonic base & last_offset */ + do { + seq = read_seqbegin(&monotonic_lock); + last_offset = offset_tick; + base = monotonic_base; + } while (read_seqretry(&monotonic_lock, seq)); + + /* Read the pmtmr */ + this_offset = read_pmtmr(); + + /* convert to nanoseconds */ + ret = (this_offset - last_offset) & ACPI_PM_MASK; + ret = base + (cyc2us(ret) * NSEC_PER_USEC); + return ret; +} + +static void delay_pmtmr(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + + +/* + * get the offset (in microseconds) from the last call to mark_offset() + * - Called holding a reader xtime_lock + */ +static unsigned long get_offset_pmtmr(void) +{ + u32 now, offset, delta = 0; + + offset = offset_tick; + now = read_pmtmr(); + delta = (now - offset)&ACPI_PM_MASK; + + return (unsigned long) offset_delay + cyc2us(delta); +} + + +/* acpi timer_opts struct */ +struct timer_opts timer_pmtmr = { + .name = "pmtmr", + .init = init_pmtmr, + .mark_offset = mark_offset_pmtmr, + .get_offset = get_offset_pmtmr, + .monotonic_clock = monotonic_clock_pmtmr, + .delay = delay_pmtmr, +}; + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); --- linux-2.6.3-rc2/arch/i386/kernel/traps.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/traps.c 2004-02-12 00:41:23.000000000 -0800 @@ -54,12 +54,8 @@ #include "mach_traps.h" -asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; +struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; +struct page *default_ldt_page; /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; @@ -91,6 +87,43 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); +#ifdef CONFIG_KGDB +extern void sysenter_entry(void); +#include +#include +extern void int3(void); +extern void debug(void); +void set_intr_gate(unsigned int n, void *addr); +static void set_intr_usr_gate(unsigned int n, void *addr); +/* + * Should be able to call this breakpoint() very early in + * bring up. Just hard code the call where needed. + * The breakpoint() code is here because set_?_gate() functions + * are local (static) to trap.c. They need be done only once, + * but it does not hurt to do them over. + */ +void breakpoint(void) +{ + init_entry_mappings(); + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); + + BREAKPOINT; +} +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ + { \ + if (!user_mode(regs) ) \ + { \ + kgdb_handle_exception(trapnr, signr, error_code, regs); \ + after; \ + } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ + } +#else +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) +#endif + + static int kstack_depth_to_print = 24; void show_trace(struct task_struct *task, unsigned long * stack) @@ -119,7 +152,7 @@ void show_trace_task(struct task_struct unsigned long esp = tsk->thread.esp; /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1)) + if ((esp ^ (unsigned long)tsk->thread_info) & ~(THREAD_SIZE - 1)) return; show_trace(tsk, (unsigned long *)esp); } @@ -175,8 +208,9 @@ void show_registers(struct pt_regs *regs ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, + regs->eip, print_tainted(), regs->eflags); print_symbol("EIP is at %s\n", regs->eip); printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", @@ -192,23 +226,27 @@ void show_registers(struct pt_regs *regs * time of the fault.. */ if (in_kernel) { + u8 *eip; printk("\nStack: "); show_stack(NULL, (unsigned long*)esp); printk("Code: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - for(i=0;i<20;i++) - { - unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: + eip = (u8 *)regs->eip - 43; + for (i = 0; i < 64; i++, eip++) { + unsigned char c = 0xff; + + if ((user_mode(regs) && get_user(c, eip)) || + (!user_mode(regs) && __direct_get_user(c, eip))) { + printk(" Bad EIP value."); break; } - printk("%02x ", c); + if (eip == (u8 *)regs->eip) + printk("<%02x> ", c); + else + printk("%02x ", c); } } printk("\n"); @@ -255,12 +293,36 @@ spinlock_t die_lock = SPIN_LOCK_UNLOCKED void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; + int nl = 0; console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); handle_BUG(regs); printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP "); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); + nl = 1; +#endif + if (nl) + printk("\n"); +#ifdef CONFIG_KGDB + /* This is about the only place we want to go to kgdb even if in + * user mode. But we must go in via a trap so within kgdb we will + * always be in kernel mode. + */ + if (user_mode(regs)) + BREAKPOINT; +#endif + CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); @@ -330,6 +392,7 @@ static inline void do_trap(int trapnr, i #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -347,7 +410,9 @@ asmlinkage void do_##name(struct pt_regs #define DO_VM86_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ + return; \ } #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -394,8 +459,10 @@ gp_in_vm86: return; gp_in_kernel: - if (!fixup_exception(regs)) + if (!fixup_exception(regs)){ + CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) die("general protection fault", regs, error_code); + } } static void mem_parity_error(unsigned char reason, struct pt_regs * regs) @@ -534,10 +601,18 @@ asmlinkage void do_debug(struct pt_regs if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); - /* Mask out spurious debug traps due to lazy DR7 setting */ + /* + * Mask out spurious debug traps due to lazy DR7 setting or + * due to 4G/4G kernel mode: + */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg[7]) goto clear_dr7; + if (!user_mode(regs)) { + // restore upon return-to-userspace: + set_thread_flag(TIF_DB7); + goto clear_dr7; + } } if (regs->eflags & VM_MASK) @@ -557,8 +632,18 @@ asmlinkage void do_debug(struct pt_regs * allowing programs to debug themselves without the ptrace() * interface. */ +#ifdef CONFIG_KGDB + /* + * I think this is the only "real" case of a TF in the kernel + * that really belongs to user space. Others are + * "Ours all ours!" + */ + if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry)) + goto clear_TF_reenable; +#else if ((regs->xcs & 3) == 0) goto clear_TF_reenable; +#endif if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) goto clear_TF; } @@ -570,6 +655,17 @@ asmlinkage void do_debug(struct pt_regs info.si_errno = 0; info.si_code = TRAP_BRKPT; +#ifdef CONFIG_KGDB + /* + * If this is a kernel mode trap, we need to reset db7 to allow us + * to continue sanely ALSO skip the signal delivery + */ + if ((regs->xcs & 3) == 0) + goto clear_dr7; + + /* if not kernel, allow ints but only if they were on */ + if ( regs->eflags & 0x200) local_irq_enable(); +#endif /* If this is a kernel mode trap, save the user PC on entry to * the kernel, that's what the debugger can make sense of. */ @@ -584,6 +680,7 @@ clear_dr7: __asm__("movl %0,%%db7" : /* no output */ : "r" (0)); + CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) return; debug_vm86: @@ -779,19 +876,53 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) +void __init trap_init_virtual_IDT(void) { - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. + * "idt" is magic - it overlaps the idt_descr + * variable so that updating idt will automatically + * update the idt descriptor.. */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); + __set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + idt_descr.address = __fix_to_virt(FIX_IDT); + __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); } + +void __init trap_init_virtual_GDT(void) +{ + int cpu = smp_processor_id(); + struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu; + struct Xgt_desc_struct tmp_desc = {0, 0}; + struct tss_struct * t; + + __asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory"); + +#ifdef CONFIG_X86_HIGH_ENTRY + if (!cpu) { + __set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL); + __set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_0, __pa(init_tss), PAGE_KERNEL); + __set_fixmap(FIX_TSS_1, __pa(init_tss) + 1*PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_2, __pa(init_tss) + 2*PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_3, __pa(init_tss) + 3*PAGE_SIZE, PAGE_KERNEL); + } + + gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu; +#else + gdt_desc->address = (unsigned long)cpu_gdt_table[cpu]; +#endif + __asm__ __volatile__("lgdt %0": "=m" (*gdt_desc)); + +#ifdef CONFIG_X86_HIGH_ENTRY + t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; +#else + t = init_tss + cpu; #endif + set_tss_desc(cpu, t); + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; + load_TR_desc(); +} #define _set_gate(gate_addr,type,dpl,addr,seg) \ do { \ @@ -818,20 +949,26 @@ void set_intr_gate(unsigned int n, void _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); } -static void __init set_trap_gate(unsigned int n, void *addr) +void __init set_trap_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); } -static void __init set_system_gate(unsigned int n, void *addr) +void __init set_system_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); } -static void __init set_call_gate(void *a, void *addr) +void __init set_call_gate(void *a, void *addr) { _set_gate(a,12,3,addr,__KERNEL_CS); } +#ifdef CONFIG_KGDB +void set_intr_usr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); +} +#endif static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { @@ -850,11 +987,16 @@ void __init trap_init(void) #ifdef CONFIG_X86_LOCAL_APIC init_apic_mappings(); #endif + init_entry_mappings(); set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); +#ifndef CONFIG_KGDB set_system_gate(3,&int3); /* int3-5 can be called from all */ +#else + set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ +#endif set_system_gate(4,&overflow); set_system_gate(5,&bounds); set_trap_gate(6,&invalid_op); --- linux-2.6.3-rc2/arch/i386/kernel/vm86.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/vm86.c 2004-02-12 00:41:23.000000000 -0800 @@ -125,7 +125,7 @@ struct pt_regs * save_v86_state(struct k tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; - load_esp0(tss, ¤t->thread); + load_virtual_esp0(tss, current); current->thread.saved_esp0 = 0; put_cpu(); @@ -305,7 +305,7 @@ static void do_sys_vm86(struct kernel_vm tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; - load_esp0(tss, &tsk->thread); + load_virtual_esp0(tss, tsk); put_cpu(); tsk->thread.screen_bitmap = info->screen_bitmap; --- linux-2.6.3-rc2/arch/i386/kernel/vmlinux.lds.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vmlinux.lds.S 2004-02-12 00:41:23.000000000 -0800 @@ -3,6 +3,9 @@ */ #include +#include +#include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -10,7 +13,7 @@ ENTRY(startup_32) jiffies = jiffies_64; SECTIONS { - . = 0xC0000000 + 0x100000; + . = __PAGE_OFFSET + 0x100000; /* read-only */ _text = .; /* Text and read-only data */ .text : { @@ -19,6 +22,19 @@ SECTIONS *(.gnu.warning) } = 0x9090 +#ifdef CONFIG_X86_4G + . = ALIGN(PAGE_SIZE_asm); + __entry_tramp_start = .; + . = FIX_ENTRY_TRAMPOLINE_0_addr; + __start___entry_text = .; + .entry.text : AT (__entry_tramp_start) { *(.entry.text) } + __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text); + . = __entry_tramp_end; + . = ALIGN(PAGE_SIZE_asm); +#else + .entry.text : { *(.entry.text) } +#endif + _etext = .; /* End of text section */ . = ALIGN(16); /* Exception table */ @@ -34,15 +50,12 @@ SECTIONS CONSTRUCTORS } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __nosave_begin = .; .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __nosave_end = .; - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - . = ALIGN(32); .data.cacheline_aligned : { *(.data.cacheline_aligned) } @@ -52,7 +65,7 @@ SECTIONS .data.init_task : { *(.data.init_task) } /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE_asm); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -91,7 +104,7 @@ SECTIONS from .altinstructions and .eh_frame */ .exit.text : { *(.exit.text) } .exit.data : { *(.exit.data) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; @@ -99,10 +112,22 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __init_end = .; /* freed after init ends here */ - + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_tss : { *(.data.tss) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_default_ldt : { *(.data.default_ldt) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_idt : { *(.data.idt) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_gdt : { *(.data.gdt) } + __bss_start = .; /* BSS */ .bss : { *(.bss) } __bss_stop = .; @@ -122,4 +147,6 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + } --- linux-2.6.3-rc2/arch/i386/kernel/vsyscall.lds 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vsyscall.lds 2004-02-12 00:41:23.000000000 -0800 @@ -5,7 +5,7 @@ */ /* This must match . */ -VSYSCALL_BASE = 0xffffe000; +VSYSCALL_BASE = 0xffffd000; SECTIONS { --- linux-2.6.3-rc2/arch/i386/kernel/vsyscall-sysenter.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vsyscall-sysenter.S 2004-02-12 00:41:23.000000000 -0800 @@ -7,6 +7,11 @@ .type __kernel_vsyscall,@function __kernel_vsyscall: .LSTART_vsyscall: + cmpl $192, %eax + jne 1f + int $0x80 + ret +1: push %ecx .Lpush_ecx: push %edx --- linux-2.6.3-rc2/arch/i386/lib/checksum.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/checksum.S 2004-02-12 00:41:23.000000000 -0800 @@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic ( .previous .align 4 -.globl csum_partial_copy_generic +.globl direct_csum_partial_copy_generic #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 -csum_partial_copy_generic: +direct_csum_partial_copy_generic: subl $4,%esp pushl %edi pushl %esi @@ -422,7 +422,7 @@ DST( movb %cl, (%edi) ) #define ARGBASE 12 -csum_partial_copy_generic: +direct_csum_partial_copy_generic: pushl %ebx pushl %edi pushl %esi --- linux-2.6.3-rc2/arch/i386/lib/dec_and_lock.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/dec_and_lock.c 2004-02-12 00:41:21.000000000 -0800 @@ -10,6 +10,7 @@ #include #include +#ifndef ATOMIC_DEC_AND_LOCK int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { int counter; @@ -38,3 +39,5 @@ slow_path: spin_unlock(lock); return 0; } +#endif + --- linux-2.6.3-rc2/arch/i386/lib/getuser.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/getuser.S 2004-02-12 00:41:23.000000000 -0800 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -28,7 +29,7 @@ .globl __get_user_1 __get_user_1: GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 1: movzbl (%eax),%edx xorl %eax,%eax @@ -40,7 +41,7 @@ __get_user_2: addl $1,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 2: movzwl -1(%eax),%edx xorl %eax,%eax @@ -52,7 +53,7 @@ __get_user_4: addl $3,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 3: movl -3(%eax),%edx xorl %eax,%eax --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/lib/kgdb_serial.c 2004-02-12 00:39:46.000000000 -0800 @@ -0,0 +1,499 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#ifdef CONFIG_DISCONTIGMEM +static inline int kgdb_mem_init_done(void) +{ + return highmem_start_page != NULL; +} +#else +static inline int kgdb_mem_init_done(void) +{ + return max_mapnr != 0; +} +#endif + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.3-rc2/arch/i386/lib/Makefile 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/Makefile 2004-02-12 00:39:45.000000000 -0800 @@ -9,4 +9,5 @@ lib-y = checksum.o delay.o \ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o --- linux-2.6.3-rc2/arch/i386/lib/mmx.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/i386/lib/mmx.c 2004-02-12 00:40:19.000000000 -0800 @@ -121,7 +121,7 @@ void *_mmx_memcpy(void *to, const void * return p; } -#ifdef CONFIG_MK7 +#ifdef CONFIG_CPU_ONLY_K7 /* * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and --- linux-2.6.3-rc2/arch/i386/lib/usercopy.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/lib/usercopy.c 2004-02-12 00:41:23.000000000 -0800 @@ -76,7 +76,7 @@ do { \ * and returns @count. */ long -__strncpy_from_user(char *dst, const char __user *src, long count) +__direct_strncpy_from_user(char *dst, const char __user *src, long count) { long res; __do_strncpy_from_user(dst, src, count, res); @@ -102,7 +102,7 @@ __strncpy_from_user(char *dst, const cha * and returns @count. */ long -strncpy_from_user(char *dst, const char __user *src, long count) +direct_strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) @@ -147,7 +147,7 @@ do { \ * On success, this will be zero. */ unsigned long -clear_user(void __user *to, unsigned long n) +direct_clear_user(void __user *to, unsigned long n) { might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) @@ -167,7 +167,7 @@ clear_user(void __user *to, unsigned lon * On success, this will be zero. */ unsigned long -__clear_user(void __user *to, unsigned long n) +__direct_clear_user(void __user *to, unsigned long n) { __do_clear_user(to, n); return n; @@ -184,7 +184,7 @@ __clear_user(void __user *to, unsigned l * On exception, returns 0. * If the string is too long, returns a value greater than @n. */ -long strnlen_user(const char __user *s, long n) +long direct_strnlen_user(const char __user *s, long n) { unsigned long mask = -__addr_ok(s); unsigned long res, tmp; @@ -575,3 +575,4 @@ unsigned long __copy_from_user_ll(void * n = __copy_user_zeroing_intel(to, (const void *) from, n); return n; } + --- linux-2.6.3-rc2/arch/i386/mach-voyager/voyager_smp.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/mach-voyager/voyager_smp.c 2004-02-12 00:40:19.000000000 -0800 @@ -553,7 +553,7 @@ do_boot_cpu(__u8 cpu) /* For the 486, we can't use the 4Mb page table trick, so * must map a region of memory */ -#ifdef CONFIG_M486 +#ifdef CONFIG_CPU_486 int i; unsigned long *page_table_copies = (unsigned long *) __get_free_page(GFP_KERNEL); @@ -612,7 +612,7 @@ do_boot_cpu(__u8 cpu) /* set the original swapper_pg_dir[0] to map 0 to 4Mb transparently * (so that the booting CPU can find start_32 */ orig_swapper_pg_dir0 = swapper_pg_dir[0]; -#ifdef CONFIG_M486 +#ifdef CONFIG_CPU_486 if(page_table_copies == NULL) panic("No free memory for 486 page tables\n"); for(i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++) @@ -669,7 +669,7 @@ do_boot_cpu(__u8 cpu) /* reset the page table */ swapper_pg_dir[0] = orig_swapper_pg_dir0; local_flush_tlb(); -#ifdef CONFIG_M486 +#ifdef CONFIG_CPU_486 free_page((unsigned long)page_table_copies); #endif --- linux-2.6.3-rc2/arch/i386/Makefile 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/Makefile 2004-02-12 00:40:32.000000000 -0800 @@ -19,33 +19,63 @@ LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := -CFLAGS += -pipe +CFLAGS += -pipe -msoft-float # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) align := $(subst -functions=0,,$(call check_gcc,-falign-functions=0,-malign-functions=0)) -cflags-$(CONFIG_M386) += -march=i386 -cflags-$(CONFIG_M486) += -march=i486 -cflags-$(CONFIG_M586) += -march=i586 -cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call check_gcc,-march=pentium-mmx,-march=i586) -cflags-$(CONFIG_M686) += -march=i686 -cflags-$(CONFIG_MPENTIUMII) += $(call check_gcc,-march=pentium2,-march=i686) -cflags-$(CONFIG_MPENTIUMIII) += $(call check_gcc,-march=pentium3,-march=i686) -cflags-$(CONFIG_MPENTIUM4) += $(call check_gcc,-march=pentium4,-march=i686) -cflags-$(CONFIG_MK6) += $(call check_gcc,-march=k6,-march=i586) +cflags-$(CONFIG_CPU_PENTIUM4) := $(call check_gcc,-march=pentium4,-march=i686) + +ifdef CONFIG_CPU_PENTIUM4 + cflags-$(CONFIG_CPU_K8) := $(call check_gcc,-march=pentium3,-march=i686) +else + cflags-$(CONFIG_CPU_K8) := $(call check_gcc,-march=k8,$(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4)) +endif + # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call check_gcc,-march=k8,$(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4)) -cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MWINCHIPC6) += $(call check_gcc,-march=winchip-c6,-march=i586) -cflags-$(CONFIG_MWINCHIP2) += $(call check_gcc,-march=winchip2,-march=i586) -cflags-$(CONFIG_MWINCHIP3D) += $(call check_gcc,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call check_gcc,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MVIAC3_2) += $(call check_gcc,-march=c3-2,-march=i686) +ifdef CONFIG_CPU_PENTIUM4 + cflags-$(CONFIG_CPU_K7) := $(call check_gcc,-march=pentium3,-march=i686) +else + cflags-$(CONFIG_CPU_K7) := $(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4) +endif + +cflags-$(CONFIG_CPU_PENTIUMM) := $(call check_gcc,-march=pentium3,-march=i686) +cflags-$(CONFIG_CPU_PENTIUMIII) := $(call check_gcc,-march=pentium3,-march=i686) +cflags-$(CONFIG_CPU_PENTIUMII) := $(call check_gcc,-march=pentium2,-march=i686) +cflags-$(CONFIG_CPU_VIAC3_2) := $(call check_gcc,-march=c3-2,-march=i686) +cflags-$(CONFIG_CPU_CRUSOE) := -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_CPU_686) := -march=i686 + +# supports i686 without cmov +cflags-$(CONFIG_CPU_CYRIXIII) := $(call check_gcc,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 + +cflags-$(CONFIG_CPU_K6) := -march=k6 +cflags-$(CONFIG_CPU_586MMX) := $(call check_gcc,-march=pentium-mmx,-march=i586) + +# Winchip supports i586 +cflags-$(CONFIG_CPU_WINCHIPC6) := $(call check_gcc,-march=winchip-c6,-march=i486) +cflags-$(CONFIG_CPU_WINCHIP2) := $(call check_gcc,-march=winchip2,-march=i486) +cflags-$(CONFIG_CPU_WINCHIP3D) := $(call check_gcc,-march=winchip2,-march=i486) + +cflags-$(CONFIG_CPU_586TSC) := -march=i586 +cflags-$(CONFIG_CPU_586) := -march=i586 +cflags-$(CONFIG_CPU_486) := -march=i486 +cflags-$(CONFIG_CPU_386) := -march=i386 + +# AMD Elan support +cflags-$(CONFIG_X86_ELAN) := -march=i486 + +# -mregparm=3 works ok on gcc-3.0 and later +# +GCC_VERSION := $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC)) +cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;) + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call check_gcc,-funit-at-a-time,) CFLAGS += $(cflags-y) @@ -84,6 +114,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default +mflags-$(CONFIG_KGDB) += -gdwarf-2 +mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ --- linux-2.6.3-rc2/arch/i386/math-emu/fpu_system.h 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/math-emu/fpu_system.h 2004-02-12 00:41:23.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include /* This sets the pointer FPU_info to point to the argument part of the stack frame of math_emulate() */ @@ -22,7 +23,7 @@ /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#define LDT_DESCRIPTOR(s) (((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3]) #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) --- linux-2.6.3-rc2/arch/i386/mm/fault.c 2003-12-17 21:20:01.000000000 -0800 +++ 25/arch/i386/mm/fault.c 2004-02-12 00:41:23.000000000 -0800 @@ -27,6 +27,7 @@ #include #include #include +#include extern void die(const char *,struct pt_regs *,long); @@ -104,8 +105,17 @@ static inline unsigned long get_segment_ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ down(¤t->mm->context.sem); +#if 1 + /* horrible hack for 4/4 disabled kernels. + I'm not quite sure what the TLB flush is good for, + it's mindlessly copied from the read_ldt code */ + __flush_tlb_global(); + desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]); + desc = (void *)desc + ((seg & ~7) % PAGE_SIZE); +#else desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); +#endif } else { /* Must disable preemption while reading the GDT. */ desc = (u32 *)&cpu_gdt_table[get_cpu()]; @@ -118,6 +128,9 @@ static inline unsigned long get_segment_ (desc[1] & 0xff000000); if (seg & (1<<2)) { +#if 1 + kunmap((void *)((unsigned long)desc & PAGE_MASK)); +#endif up(¤t->mm->context.sem); } else put_cpu(); @@ -243,6 +256,19 @@ asmlinkage void do_page_fault(struct pt_ * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 1) == 0. */ +#ifdef CONFIG_X86_4G + /* + * On 4/4 all kernels faults are either bugs, vmalloc or prefetch + */ + if (unlikely((regs->xcs & 3) == 0)) { + if (error_code & 3) + goto bad_area_nosemaphore; + + /* If it's vm86 fall through */ + if (!(regs->eflags & VM_MASK)) + goto vmalloc_fault; + } +#else if (unlikely(address >= TASK_SIZE)) { if (!(error_code & 5)) goto vmalloc_fault; @@ -252,6 +278,7 @@ asmlinkage void do_page_fault(struct pt_ */ goto bad_area_nosemaphore; } +#endif mm = tsk->mm; @@ -403,6 +430,12 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ +#ifdef CONFIG_KGDB + if (!user_mode(regs)){ + kgdb_handle_exception(14,SIGBUS, error_code, regs); + return; + } +#endif bust_spinlocks(1); --- linux-2.6.3-rc2/arch/i386/mm/init.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/mm/init.c 2004-02-12 00:41:24.000000000 -0800 @@ -40,125 +40,13 @@ #include #include #include +#include DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int do_test_wp_bit(void); -/* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - if (pmd_table != pmd_offset(pgd, 0)) - BUG(); -#else - pmd_table = pmd_offset(pgd, 0); -#endif - - return pmd_table; -} - -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static pte_t * __init one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); - if (page_table != pte_offset_kernel(pmd, 0)) - BUG(); - - return page_table; - } - - return pte_offset_kernel(pmd, 0); -} - -/* - * This function initializes a certain range of kernel virtual memory - * with new bootmem page tables, everywhere page tables are missing in - * the given range. - */ - -/* - * NOTE: The pagetables are allocated contiguous on the physical space - * so we can cache the place of the first one and move around without - * checking the pgd every time. - */ -static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - int pgd_idx, pmd_idx; - unsigned long vaddr; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pmd_idx = pmd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (pgd_none(*pgd)) - one_md_table_init(pgd); - - pmd = pmd_offset(pgd, vaddr); - for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - if (pmd_none(*pmd)) - one_page_table_init(pmd); - - vaddr += PMD_SIZE; - } - pmd_idx = 0; - } -} - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET. - */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) -{ - unsigned long pfn; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int pgd_idx, pmd_idx, pte_ofs; - - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; - - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { - /* Map with big pages if possible, otherwise create normal page tables. */ - if (cpu_has_pse) { - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); - pfn += PTRS_PER_PTE; - } else { - pte = one_page_table_init(pmd); - - for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); - } - } - } -} - static inline int page_kills_ppro(unsigned long pagenr) { if (pagenr >= 0x70000 && pagenr <= 0x7003F) @@ -206,11 +94,8 @@ static inline int page_is_ram(unsigned l return 0; } -#ifdef CONFIG_HIGHMEM pte_t *kmap_pte; -pgprot_t kmap_prot; -EXPORT_SYMBOL(kmap_prot); EXPORT_SYMBOL(kmap_pte); #define kmap_get_fixmap_pte(vaddr) \ @@ -218,29 +103,7 @@ EXPORT_SYMBOL(kmap_pte); void __init kmap_init(void) { - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} - -void __init permanent_kmaps_init(pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + pgd_index(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; + kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN)); } void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) @@ -255,6 +118,8 @@ void __init one_highpage_init(struct pag SetPageReserved(page); } +#ifdef CONFIG_HIGHMEM + #ifndef CONFIG_DISCONTIGMEM void __init set_highmem_pages_init(int bad_ppro) { @@ -266,12 +131,9 @@ void __init set_highmem_pages_init(int b #else extern void set_highmem_pages_init(int); #endif /* !CONFIG_DISCONTIGMEM */ - #else -#define kmap_init() do { } while (0) -#define permanent_kmaps_init(pgd_base) do { } while (0) -#define set_highmem_pages_init(bad_ppro) do { } while (0) -#endif /* CONFIG_HIGHMEM */ +# define set_highmem_pages_init(bad_ppro) do { } while (0) +#endif unsigned long __PAGE_KERNEL = _PAGE_KERNEL; @@ -281,30 +143,125 @@ unsigned long __PAGE_KERNEL = _PAGE_KERN extern void __init remap_numa_kva(void); #endif -static void __init pagetable_init (void) +static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_base + pgd_index(address); + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); + } +} + +static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) { unsigned long vaddr; - pgd_t *pgd_base = swapper_pg_dir; + for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE) + prepare_pagetables(pgd_base, vaddr); +} + +void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) +{ + unsigned long vaddr; + pgd_t *pgd; + int i, j, k; + pmd_t *pmd; + pte_t *pte, *pte_base; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + if (cpu_has_pse) { + unsigned long __pe; + + set_in_cr4(X86_CR4_PSE); + boot_cpu_data.wp_works_ok = 1; + __pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start; + /* Make it "global" too if supported */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); +#if !defined(CONFIG_X86_SWITCH_PAGETABLES) + __pe += _PAGE_GLOBAL; + __PAGE_KERNEL |= _PAGE_GLOBAL; +#endif + } + set_pmd(pmd, __pmd(__pe)); + continue; + } + if (!pmd_present(*pmd)) + pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + else + pte_base = (pte_t *) page_address(pmd_page(*pmd)); + pte = pte_base; + for (k = 0; k < PTRS_PER_PTE; pte++, k++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + *pte = mk_pte_phys(vaddr-start, PAGE_KERNEL); + } + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); + } + } +} + +static void __init pagetable_init (void) +{ + unsigned long vaddr, end; + pgd_t *pgd_base; #ifdef CONFIG_X86_PAE int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); #endif - /* Enable PSE if available */ - if (cpu_has_pse) { - set_in_cr4(X86_CR4_PSE); - } + /* + * This can be zero as well - no problem, in that case we exit + * the loops anyway due to the PTRS_PER_* conditions. + */ + end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; + pgd_base = swapper_pg_dir; +#ifdef CONFIG_X86_PAE + /* + * It causes too many problems if there's no proper pmd set up + * for all 4 entries of the PGD - so we allocate all of them. + * PAE systems will not miss this extra 4-8K anyway ... + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1)); } +#endif + /* + * Set up lowmem-sized identity mappings at PAGE_OFFSET: + */ + setup_identity_mappings(pgd_base, PAGE_OFFSET, end); - kernel_physical_mapping_init(pgd_base); + /* + * Add flat-mode identity-mappings - SMP needs it when + * starting up on an AP from real-mode. (In the non-PAE + * case we already have these mappings through head.S.) + * All user-space mappings are explicitly cleared after + * SMP startup. + */ +#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE) + setup_identity_mappings(pgd_base, 0, 16*1024*1024); +#endif remap_numa_kva(); /* @@ -312,38 +269,64 @@ static void __init pagetable_init (void) * created - mappings will be set by set_fixmap(): */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - page_table_range_init(vaddr, 0, pgd_base); + fixrange_init(vaddr, 0, pgd_base); - permanent_kmaps_init(pgd_base); +#ifdef CONFIG_HIGHMEM + { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; + } #endif } -void zap_low_mappings (void) +/* + * Clear kernel pagetables in a PMD_SIZE-aligned range. + */ +static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) { - int i; + unsigned long vaddr; + pgd_t *pgd; + pmd_t *pmd; + int i, j; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + pmd_clear(pmd); + } + } + flush_tlb_all(); +} + +void zap_low_mappings(void) +{ + printk("zapping low mappings.\n"); /* * Zap initial low-memory mappings. - * - * Note that "pgd_clear()" doesn't do it for - * us, because pgd_clear() is a no-op on i386. */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else - set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif - flush_tlb_all(); + clear_mappings(swapper_pg_dir, 0, 16*1024*1024); } #ifndef CONFIG_DISCONTIGMEM @@ -393,7 +376,15 @@ void __init paging_init(void) set_in_cr4(X86_CR4_PAE); #endif __flush_tlb_all(); - + /* + * Subtle. SMP is doing it's boot stuff late (because it has to + * fork idle threads) - but it also needs low mappings for the + * protected-mode entry to work. We zap these entries only after + * the WP-bit has been tested. + */ +#ifndef CONFIG_SMP + zap_low_mappings(); +#endif kmap_init(); zone_sizes_init(); } @@ -515,22 +506,18 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); - /* - * Subtle. SMP is doing it's boot stuff late (because it has to - * fork idle threads) - but it also needs low mappings for the - * protected-mode entry to work. We zap these entries only after - * the WP-bit has been tested. - */ -#ifndef CONFIG_SMP - zap_low_mappings(); -#endif + entry_trampoline_setup(); + default_ldt_page = virt_to_page(default_ldt); + load_LDT(&init_mm.context); } -kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; +kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache; void __init pgtable_cache_init(void) { + void (*ctor)(void *, kmem_cache_t *, unsigned long); + void (*dtor)(void *, kmem_cache_t *, unsigned long); + if (PTRS_PER_PMD > 1) { pmd_cache = kmem_cache_create("pmd", PTRS_PER_PMD*sizeof(pmd_t), @@ -540,13 +527,36 @@ void __init pgtable_cache_init(void) NULL); if (!pmd_cache) panic("pgtable_cache_init(): cannot create pmd cache"); + + if (TASK_SIZE > PAGE_OFFSET) { + kpmd_cache = kmem_cache_create("kpmd", + PTRS_PER_PMD*sizeof(pmd_t), + 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, + kpmd_ctor, + NULL); + if (!kpmd_cache) + panic("pgtable_cache_init(): " + "cannot create kpmd cache"); + } } + + if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET) + ctor = pgd_ctor; + else + ctor = NULL; + + if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET) + dtor = pgd_dtor; + else + dtor = NULL; + pgd_cache = kmem_cache_create("pgd", PTRS_PER_PGD*sizeof(pgd_t), 0, SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, - pgd_ctor, - PTRS_PER_PMD == 1 ? pgd_dtor : NULL); + ctor, + dtor); if (!pgd_cache) panic("pgtable_cache_init(): Cannot create pgd cache"); } --- linux-2.6.3-rc2/arch/i386/mm/pgtable.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/i386/mm/pgtable.c 2004-02-12 00:41:23.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include void show_mem(void) { @@ -157,11 +158,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } +void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags) +{ + pmd_t *kpmd, *pmd; + kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1], + (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE); + pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS); + + memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t)); + memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t)); +} + /* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking + * List of all pgd's needed so it can invalidate entries in both cached + * and uncached pgd's. This is essentially codepath-based locking * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. @@ -170,30 +180,60 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * could be used. The locking scheme was chosen on the basis of * manfred's recommendations and having no core impact whatsoever. * -- wli + * + * The entire issue goes away when XKVA is configured. */ spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED; LIST_HEAD(pgd_list); -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +/* + * This is not that hard to figure out. + * (a) PTRS_PER_PMD == 1 means non-PAE. + * (b) PTRS_PER_PMD > 1 means PAE. + * (c) TASK_SIZE > PAGE_OFFSET means XKVA. + * (d) TASK_SIZE <= PAGE_OFFSET means non-XKVA. + * + * Do *NOT* back out the preconstruction like the patch I'm cleaning + * up after this very instant did, or at all, for that matter. + * This is never called when PTRS_PER_PMD > 1 && TASK_SIZE > PAGE_OFFSET. + * -- wli + */ +void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused) { + pgd_t *pgd = (pgd_t *)__pgd; unsigned long flags; - if (PTRS_PER_PMD == 1) - spin_lock_irqsave(&pgd_lock, flags); + if (PTRS_PER_PMD == 1) { + if (TASK_SIZE <= PAGE_OFFSET) + spin_lock_irqsave(&pgd_lock, flags); + else + memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS], + &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS], + NR_SHARED_PMDS * sizeof(pgd_t)); + } - memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + if (TASK_SIZE <= PAGE_OFFSET) + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); if (PTRS_PER_PMD > 1) return; - list_add(&virt_to_page(pgd)->lru, &pgd_list); - spin_unlock_irqrestore(&pgd_lock, flags); - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + if (TASK_SIZE > PAGE_OFFSET) + memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t)); + else { + list_add(&virt_to_page(pgd)->lru, &pgd_list); + spin_unlock_irqrestore(&pgd_lock, flags); + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + } } -/* never called when PTRS_PER_PMD > 1 */ +/* + * Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET + * for with PAE we would list_del() multiple times, and for non-PAE + * with XKVA all the AGP pgd shootdown code is unnecessary. + */ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ @@ -203,6 +243,12 @@ void pgd_dtor(void *pgd, kmem_cache_t *c spin_unlock_irqrestore(&pgd_lock, flags); } +/* + * See the comments above pgd_ctor() wrt. preconstruction. + * Do *NOT* memcpy() here. If you do, you back out important + * anti- cache pollution code. + * + */ pgd_t *pgd_alloc(struct mm_struct *mm) { int i; @@ -211,15 +257,33 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (PTRS_PER_PMD == 1 || !pgd) return pgd; + /* + * In the 4G userspace case alias the top 16 MB virtual + * memory range into the user mappings as well (these + * include the trampoline and CPU data structures). + */ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + kmem_cache_t *cache; + pmd_t *pmd; + + if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) + cache = kpmd_cache; + else + cache = pmd_cache; + + pmd = kmem_cache_alloc(cache, GFP_KERNEL); if (!pmd) goto out_oom; set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); } - return pgd; + return pgd; out_oom: + /* + * we don't have to handle the kpmd_cache here, since it's the + * last allocation, and has either nothing to free or when it + * succeeds the whole operation succeeds. + */ for (i--; i >= 0; i--) kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); kmem_cache_free(pgd_cache, pgd); @@ -230,10 +294,29 @@ void pgd_free(pgd_t *pgd) { int i; - /* in the PAE case user pgd entries are overwritten before usage */ - if (PTRS_PER_PMD > 1) - for (i = 0; i < USER_PTRS_PER_PGD; ++i) - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); /* in the non-PAE case, clear_page_tables() clears user pgd entries */ + if (PTRS_PER_PMD == 1) + goto out_free; + + /* in the PAE case user pgd entries are overwritten before usage */ + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { + kmem_cache_t *cache; + pmd_t *pmd = __va(pgd_val(pgd[i]) - 1); + + /* + * only userspace pmd's are cleared for us + * by mm/memory.c; it's a slab cache invariant + * that we must separate the kernel pmd slab + * all times, else we'll have bad pmd's. + */ + if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) + cache = kpmd_cache; + else + cache = pmd_cache; + + kmem_cache_free(cache, pmd); + } +out_free: kmem_cache_free(pgd_cache, pgd); } + --- linux-2.6.3-rc2/arch/i386/oprofile/nmi_timer_int.c 2003-06-22 12:04:43.000000000 -0700 +++ 25/arch/i386/oprofile/nmi_timer_int.c 2004-02-12 00:41:17.000000000 -0800 @@ -48,9 +48,13 @@ static struct oprofile_operations nmi_ti .cpu_type = "timer" }; - int __init nmi_timer_init(struct oprofile_operations ** ops) { + extern int nmi_active; + + if (nmi_active <= 0) + return -ENODEV; + *ops = &nmi_timer_ops; printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); return 0; --- linux-2.6.3-rc2/arch/i386/oprofile/op_model_p4.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/oprofile/op_model_p4.c 2004-02-12 00:40:06.000000000 -0800 @@ -382,11 +382,8 @@ static struct p4_event_binding p4_events static unsigned int get_stagger(void) { #ifdef CONFIG_SMP - int cpu; - if (smp_num_siblings > 1) { - cpu = smp_processor_id(); - return (cpu_sibling_map[cpu] > cpu) ? 0 : 1; - } + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); #endif return 0; } --- linux-2.6.3-rc2/arch/ia64/defconfig 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ia64/defconfig 2004-02-12 00:39:25.000000000 -0800 @@ -48,13 +48,14 @@ CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_TIME_INTERPOLATION=y +CONFIG_EFI=y # CONFIG_ITANIUM is not set CONFIG_MCKINLEY=y # CONFIG_IA64_GENERIC is not set # CONFIG_IA64_DIG is not set -# CONFIG_IA64_HP_SIM is not set CONFIG_IA64_HP_ZX1=y # CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_HP_SIM is not set # CONFIG_IA64_PAGE_SIZE_4KB is not set # CONFIG_IA64_PAGE_SIZE_8KB is not set CONFIG_IA64_PAGE_SIZE_16KB=y @@ -80,15 +81,14 @@ CONFIG_HUGETLB_PAGE_SIZE_64MB=y # CONFIG_HUGETLB_PAGE_SIZE_256KB is not set # CONFIG_IA64_PAL_IDLE is not set CONFIG_SMP=y +CONFIG_NR_CPUS=16 # CONFIG_PREEMPT is not set +CONFIG_HAVE_DEC_LOCK=y CONFIG_IA32_SUPPORT=y CONFIG_COMPAT=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y -CONFIG_EFI=y CONFIG_EFI_VARS=y -CONFIG_NR_CPUS=16 CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=y @@ -140,7 +140,6 @@ CONFIG_HOTPLUG=y # # Plug and Play support # -# CONFIG_PNP is not set # # Block devices @@ -179,6 +178,7 @@ CONFIG_IDE_TASKFILE_IO=y # # IDE chipset support/bugfixes # +# CONFIG_IDE_GENERIC is not set CONFIG_BLK_DEV_IDEPCI=y CONFIG_IDEPCI_SHARE_IRQ=y # CONFIG_BLK_DEV_OFFBOARD is not set @@ -223,7 +223,6 @@ CONFIG_IDEDMA_AUTO=y # # I2O device support # -# CONFIG_I2O is not set # # Multi-device support (RAID and LVM) @@ -234,6 +233,7 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID5=m +# CONFIG_MD_RAID6 is not set CONFIG_MD_MULTIPATH=m CONFIG_BLK_DEV_DM=m CONFIG_DM_IOCTL_V4=y @@ -303,9 +303,15 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set CONFIG_SCSI_QLOGIC_1280=y +CONFIG_SCSI_QLA2XXX=y +# CONFIG_SCSI_QLA21XX is not set +# CONFIG_SCSI_QLA22XX is not set +# CONFIG_SCSI_QLA2300 is not set +# CONFIG_SCSI_QLA2322 is not set +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_QLA6322 is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_NSP32 is not set # CONFIG_SCSI_DEBUG is not set # @@ -414,6 +420,7 @@ CONFIG_NET_PCI=y # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set # CONFIG_DGRS is not set CONFIG_EEPRO100=y # CONFIG_EEPRO100_PIO is not set @@ -539,8 +546,8 @@ CONFIG_HW_CONSOLE=y # CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_ACPI=y CONFIG_SERIAL_8250_HCDP=y +CONFIG_SERIAL_8250_ACPI=y CONFIG_SERIAL_8250_NR_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set @@ -553,24 +560,6 @@ CONFIG_UNIX98_PTYS=y CONFIG_UNIX98_PTY_COUNT=256 # -# I2C support -# -# CONFIG_I2C is not set - -# -# I2C Algorithms -# - -# -# I2C Hardware Bus support -# - -# -# I2C Hardware Sensors Chip support -# -# CONFIG_I2C_SENSOR is not set - -# # Mice # # CONFIG_BUSMOUSE is not set @@ -610,6 +599,11 @@ CONFIG_DRM_RADEON=m # CONFIG_RAW_DRIVER is not set # +# I2C support +# +# CONFIG_I2C is not set + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -789,6 +783,7 @@ CONFIG_FB_RADEON=y # CONFIG_FB_ATY is not set # CONFIG_FB_SIS is not set # CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_TRIDENT is not set @@ -844,6 +839,7 @@ CONFIG_SND_SEQUENCER=m # # CONFIG_SND_ALI5451 is not set # CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set # CONFIG_SND_CS46XX is not set # CONFIG_SND_CS4281 is not set # CONFIG_SND_EMU10K1 is not set @@ -927,7 +923,6 @@ CONFIG_USB_HIDDEV=y # USB Imaging devices # # CONFIG_USB_MDC800 is not set -# CONFIG_USB_SCANNER is not set # CONFIG_USB_MICROTEK is not set # CONFIG_USB_HPUSBSCSI is not set @@ -961,12 +956,19 @@ CONFIG_USB_HIDDEV=y # # USB Miscellaneous drivers # +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set # CONFIG_USB_TIGL is not set # CONFIG_USB_AUERSWALD is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_BRLVGER is not set # CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set + +# +# USB Gadget Support +# # CONFIG_USB_GADGET is not set # @@ -988,10 +990,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_IA64_PRINT_HAZARDS=y # CONFIG_DISABLE_VHPT is not set CONFIG_MAGIC_SYSRQ=y -CONFIG_IA64_EARLY_PRINTK=y -CONFIG_IA64_EARLY_PRINTK_UART=y -CONFIG_IA64_EARLY_PRINTK_UART_BASE=0xff5e0000 -CONFIG_IA64_EARLY_PRINTK_VGA=y # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set --- linux-2.6.3-rc2/arch/ia64/hp/common/sba_iommu.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/hp/common/sba_iommu.c 2004-02-12 00:39:25.000000000 -0800 @@ -57,11 +57,21 @@ ** There's potentially a conflict in the bio merge code with us ** advertising an iommu, but then bypassing it. Since I/O MMU bypassing ** appears to give more performance than bio-level virtual merging, we'll -** do the former for now. +** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to +** completely restrict DMA to the IOMMU. */ #define ALLOW_IOV_BYPASS /* +** This option specifically allows/disallows bypassing scatterlists with +** multiple entries. Coalescing these entries can allow better DMA streaming +** and in some cases shows better performance than entirely bypassing the +** IOMMU. Performance increase on the order of 1-2% sequential output/input +** using bonnie++ on a RAID0 MD device (sym2 & mpt). +*/ +#undef ALLOW_IOV_BYPASS_SG + +/* ** If a device prefetches beyond the end of a valid pdir entry, it will cause ** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should ** disconnect on 4k boundaries and prevent such issues. If the device is @@ -75,7 +85,10 @@ #define ENABLE_MARK_CLEAN /* -** The number of debug flags is a clue - this code is fragile. +** The number of debug flags is a clue - this code is fragile. NOTE: since +** tightening the use of res_lock the resource bitmap and actual pdir are no +** longer guaranteed to stay in sync. The sanity checking code isn't going to +** like that. */ #undef DEBUG_SBA_INIT #undef DEBUG_SBA_RUN @@ -140,9 +153,7 @@ ** allocated and free'd/purged at a time might make this ** less interesting). */ -#define DELAYED_RESOURCE_CNT 16 - -#define DEFAULT_DMA_HINT_REG 0 +#define DELAYED_RESOURCE_CNT 64 #define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) #define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) @@ -187,14 +198,15 @@ struct ioc { unsigned long imask; /* pdir IOV Space mask */ unsigned long *res_hint; /* next avail IOVP - circular search */ - spinlock_t res_lock; - unsigned long hint_mask_pdir; /* bits used for DMA hints */ + unsigned long dma_mask; + spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ + /* clearing pdir to prevent races with allocations. */ unsigned int res_bitshift; /* from the RIGHT! */ unsigned int res_size; /* size of resource map in bytes */ - unsigned int hint_shift_pdir; - unsigned long dma_mask; #if DELAYED_RESOURCE_CNT > 0 - int saved_cnt; + spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ + /* than res_lock for bigger systems. */ + int saved_cnt; struct sba_dma_pair { dma_addr_t iova; size_t size; @@ -221,6 +233,9 @@ struct ioc { static struct ioc *ioc_list; static int reserve_sba_gart = 1; +static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); +static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); + #define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset) #ifdef FULL_VALID_PDIR @@ -405,7 +420,7 @@ sba_check_sg( struct ioc *ioc, struct sc #define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ /* Convert from IOVP to IOVA and vice versa. */ -#define SBA_IOVA(ioc,iovp,offset,hint_reg) ((ioc->ibase) | (iovp) | (offset)) +#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) #define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) #define PDIR_ENTRY_SIZE sizeof(u64) @@ -453,20 +468,25 @@ sba_search_bitmap(struct ioc *ioc, unsig ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); - if (bits_wanted > (BITS_PER_LONG/2)) { - /* Search word at a time - no mask needed */ - for(; res_ptr < res_end; ++res_ptr) { - if (*res_ptr == 0) { - *res_ptr = RESMAP_MASK(bits_wanted); + + if (likely(bits_wanted == 1)) { + unsigned int bitshiftcnt; + for(; res_ptr < res_end ; res_ptr++) { + if (likely(*res_ptr != ~0UL)) { + bitshiftcnt = ffz(*res_ptr); + *res_ptr |= (1UL << bitshiftcnt); pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); pide <<= 3; /* convert to bit address */ - break; + pide += bitshiftcnt; + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; } } - /* point to the next word on next pass */ - res_ptr++; - ioc->res_bitshift = 0; - } else { + goto not_found; + + } + + if (likely(bits_wanted <= BITS_PER_LONG/2)) { /* ** Search the resource bit map on well-aligned values. ** "o" is the alignment. @@ -475,45 +495,72 @@ sba_search_bitmap(struct ioc *ioc, unsig */ unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); - unsigned long mask; + unsigned long mask, base_mask; - if (bitshiftcnt >= BITS_PER_LONG) { - bitshiftcnt = 0; - res_ptr++; - } - mask = RESMAP_MASK(bits_wanted) << bitshiftcnt; + base_mask = RESMAP_MASK(bits_wanted); + mask = base_mask << bitshiftcnt; DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); - while(res_ptr < res_end) + for(; res_ptr < res_end ; res_ptr++) { DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); ASSERT(0 != mask); - if(0 == ((*res_ptr) & mask)) { - *res_ptr |= mask; /* mark resources busy! */ - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - pide += bitshiftcnt; - break; - } - mask <<= o; - bitshiftcnt += o; - if (0 == mask) { - mask = RESMAP_MASK(bits_wanted); - bitshiftcnt=0; - res_ptr++; + for (; mask ; mask <<= o, bitshiftcnt += o) { + if(0 == ((*res_ptr) & mask)) { + *res_ptr |= mask; /* mark resources busy! */ + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + pide += bitshiftcnt; + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } } + + bitshiftcnt = 0; + mask = base_mask; + } - /* look in the same word on the next pass */ - ioc->res_bitshift = bitshiftcnt + bits_wanted; - } - /* wrapped ? */ - if (res_end <= res_ptr) { - ioc->res_hint = (unsigned long *) ioc->res_map; - ioc->res_bitshift = 0; } else { - ioc->res_hint = res_ptr; + int qwords, bits, i; + unsigned long *end; + + qwords = bits_wanted >> 6; /* /64 */ + bits = bits_wanted - (qwords * BITS_PER_LONG); + + end = res_end - qwords; + + for (; res_ptr < end; res_ptr++) { + for (i = 0 ; i < qwords ; i++) { + if (res_ptr[i] != 0) + goto next_ptr; + } + if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) + continue; + + /* Found it, mark it */ + for (i = 0 ; i < qwords ; i++) + res_ptr[i] = ~0UL; + res_ptr[i] |= RESMAP_MASK(bits); + + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + res_ptr += qwords; + ioc->res_bitshift = bits; + goto found_it; +next_ptr: + ; + } } + +not_found: + prefetch(ioc->res_map); + ioc->res_hint = (unsigned long *) ioc->res_map; + ioc->res_bitshift = 0; + return (pide); + +found_it: + ioc->res_hint = res_ptr; return (pide); } @@ -531,26 +578,67 @@ sba_alloc_range(struct ioc *ioc, size_t { unsigned int pages_needed = size >> iovp_shift; #ifdef PDIR_SEARCH_TIMING - unsigned long itc_start = ia64_get_itc(); + unsigned long itc_start; #endif unsigned long pide; + unsigned long flags; ASSERT(pages_needed); - ASSERT(pages_needed <= BITS_PER_LONG); ASSERT(0 == (size & ~iovp_mask)); + spin_lock_irqsave(&ioc->res_lock, flags); + +#ifdef PDIR_SEARCH_TIMING + itc_start = ia64_get_itc(); +#endif /* ** "seek and ye shall find"...praying never hurts either... */ - pide = sba_search_bitmap(ioc, pages_needed); - if (pide >= (ioc->res_size << 3)) { + if (unlikely(pide >= (ioc->res_size << 3))) { pide = sba_search_bitmap(ioc, pages_needed); - if (pide >= (ioc->res_size << 3)) + if (unlikely(pide >= (ioc->res_size << 3))) { +#if DELAYED_RESOURCE_CNT > 0 + /* + ** With delayed resource freeing, we can give this one more shot. We're + ** getting close to being in trouble here, so do what we can to make this + ** one count. + */ + spin_lock(&ioc->saved_lock); + if (ioc->saved_cnt > 0) { + struct sba_dma_pair *d; + int cnt = ioc->saved_cnt; + + d = &(ioc->saved[ioc->saved_cnt]); + + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + } + spin_unlock(&ioc->saved_lock); + + pide = sba_search_bitmap(ioc, pages_needed); + if (unlikely(pide >= (ioc->res_size << 3))) + panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", + ioc->ioc_hpa); +#else panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", ioc->ioc_hpa); +#endif + } } +#ifdef PDIR_SEARCH_TIMING + ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; + ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; +#endif + + prefetchw(&(ioc->pdir_base[pide])); + #ifdef ASSERT_PDIR_SANITY /* verify the first enable bit is clear */ if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { @@ -563,10 +651,7 @@ sba_alloc_range(struct ioc *ioc, size_t (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), ioc->res_bitshift ); -#ifdef PDIR_SEARCH_TIMING - ioc->avg_search[ioc->avg_idx++] = ia64_get_itc() - itc_start; - ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; -#endif + spin_unlock_irqrestore(&ioc->res_lock, flags); return (pide); } @@ -587,22 +672,33 @@ sba_free_range(struct ioc *ioc, dma_addr unsigned int pide = PDIR_INDEX(iovp); unsigned int ridx = pide >> 3; /* convert bit to byte address */ unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); - int bits_not_wanted = size >> iovp_shift; + unsigned long m; - /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ - unsigned long m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); + for (; bits_not_wanted > 0 ; res_ptr++) { + + if (unlikely(bits_not_wanted > BITS_PER_LONG)) { + + /* these mappings start 64bit aligned */ + *res_ptr = 0UL; + bits_not_wanted -= BITS_PER_LONG; + pide += BITS_PER_LONG; - DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", - __FUNCTION__, (uint) iova, size, - bits_not_wanted, m, pide, res_ptr, *res_ptr); - - ASSERT(m != 0); - ASSERT(bits_not_wanted); - ASSERT((bits_not_wanted * iovp_size) <= DMA_CHUNK_SIZE); - ASSERT(bits_not_wanted <= BITS_PER_LONG); - ASSERT((*res_ptr & m) == m); /* verify same bits are set */ - *res_ptr &= ~m; + } else { + + /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ + m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); + bits_not_wanted = 0; + + DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size, + bits_not_wanted, m, pide, res_ptr, *res_ptr); + + ASSERT(m != 0); + ASSERT(bits_not_wanted); + ASSERT((*res_ptr & m) == m); /* verify same bits are set */ + *res_ptr &= ~m; + } + } } @@ -612,9 +708,6 @@ sba_free_range(struct ioc *ioc, dma_addr * ***************************************************************/ -#define SBA_DMA_HINT(ioc, val) ((val) << (ioc)->hint_shift_pdir) - - /** * sba_io_pdir_entry - fill in one IO PDIR entry * @pdir_ptr: pointer to IO PDIR entry @@ -764,32 +857,36 @@ dma_addr_t sba_map_single(struct device *dev, void *addr, size_t size, int dir) { struct ioc *ioc; - unsigned long flags; dma_addr_t iovp; dma_addr_t offset; u64 *pdir_start; int pide; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif #ifdef ALLOW_IOV_BYPASS unsigned long pci_addr = virt_to_phys(addr); #endif - ioc = GET_IOC(dev); - ASSERT(ioc); - #ifdef ALLOW_IOV_BYPASS + ASSERT(to_pci_dev(dev)->dma_mask); /* ** Check if the PCI device can DMA to ptr... if so, just return ptr */ - if (dev && dev->dma_mask && (pci_addr & ~*dev->dma_mask) == 0) { + if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { /* ** Device is bit capable of DMA'ing to the buffer... ** just return the PCI address of ptr */ DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n", - *dev->dma_mask, pci_addr); + to_pci_dev(dev)->dma_mask, pci_addr); return pci_addr; } #endif + ioc = GET_IOC(dev); + ASSERT(ioc); + + prefetch(ioc->res_hint); ASSERT(size > 0); ASSERT(size <= DMA_CHUNK_SIZE); @@ -800,13 +897,15 @@ sba_map_single(struct device *dev, void /* round up to nearest iovp_size */ size = (size + offset + ~iovp_mask) & iovp_mask; - spin_lock_irqsave(&ioc->res_lock, flags); #ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); if (sba_check_pdir(ioc,"Check before sba_map_single()")) panic("Sanity check failed"); + spin_unlock_irqrestore(&ioc->res_lock, flags); #endif pide = sba_alloc_range(ioc, size); + iovp = (dma_addr_t) pide << iovp_shift; DBG_RUN("%s() 0x%p -> 0x%lx\n", @@ -829,10 +928,11 @@ sba_map_single(struct device *dev, void /* form complete address */ #ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); sba_check_pdir(ioc,"Check after sba_map_single()"); -#endif spin_unlock_irqrestore(&ioc->res_lock, flags); - return SBA_IOVA(ioc, iovp, offset, DEFAULT_DMA_HINT_REG); +#endif + return SBA_IOVA(ioc, iovp, offset); } /** @@ -857,7 +957,7 @@ void sba_unmap_single(struct device *dev ASSERT(ioc); #ifdef ALLOW_IOV_BYPASS - if ((iova & ioc->imask) != ioc->ibase) { + if (likely((iova & ioc->imask) != ioc->ibase)) { /* ** Address does not fall w/in IOVA, must be bypassing */ @@ -880,14 +980,15 @@ void sba_unmap_single(struct device *dev size += offset; size = ROUNDUP(size, iovp_size); - spin_lock_irqsave(&ioc->res_lock, flags); #if DELAYED_RESOURCE_CNT > 0 + spin_lock_irqsave(&ioc->saved_lock, flags); d = &(ioc->saved[ioc->saved_cnt]); d->iova = iova; d->size = size; - if (++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT) { + if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { int cnt = ioc->saved_cnt; + spin_lock(&ioc->res_lock); while (cnt--) { sba_mark_invalid(ioc, d->iova, d->size); sba_free_range(ioc, d->iova, d->size); @@ -895,11 +996,15 @@ void sba_unmap_single(struct device *dev } ioc->saved_cnt = 0; READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); } + spin_unlock_irqrestore(&ioc->saved_lock, flags); #else /* DELAYED_RESOURCE_CNT == 0 */ + spin_lock_irqsave(&ioc->res_lock, flags); sba_mark_invalid(ioc, iova, size); sba_free_range(ioc, iova, size); READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock_irqrestore(&ioc->res_lock, flags); #endif /* DELAYED_RESOURCE_CNT == 0 */ #ifdef ENABLE_MARK_CLEAN if (dir == DMA_FROM_DEVICE) { @@ -925,16 +1030,6 @@ void sba_unmap_single(struct device *dev } } #endif - spin_unlock_irqrestore(&ioc->res_lock, flags); - - /* XXX REVISIT for 2.5 Linux - need syncdma for zero-copy support. - ** For Astro based systems this isn't a big deal WRT performance. - ** As long as 2.4 kernels copyin/copyout data from/to userspace, - ** we don't need the syncdma. The issue here is I/O MMU cachelines - ** are *not* coherent in all cases. May be hwrev dependent. - ** Need to investigate more. - asm volatile("syncdma"); - */ } @@ -953,18 +1048,33 @@ sba_alloc_coherent (struct device *dev, void *addr; addr = (void *) __get_free_pages(flags, get_order(size)); - if (!addr) + if (unlikely(!addr)) return NULL; + memset(addr, 0, size); + *dma_handle = virt_to_phys(addr); + +#ifdef ALLOW_IOV_BYPASS + ASSERT(to_pci_dev(dev)->consistent_dma_mask); /* - * REVISIT: if sba_map_single starts needing more than dma_mask from the - * device, this needs to be updated. + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((*dma_handle & ~to_pci_dev(dev)->consistent_dma_mask) == 0)) { + DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", + to_pci_dev(dev)->consistent_dma_mask, *dma_handle); + + return addr; + } +#endif + + /* + * If device can't bypass or bypass is disabled, pass the 32bit fake + * device to map single to get an iova mapping. */ ioc = GET_IOC(dev); ASSERT(ioc); *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); - memset(addr, 0, size); return addr; } @@ -1232,8 +1342,10 @@ int sba_map_sg(struct device *dev, struc { struct ioc *ioc; int coalesced, filled = 0; +#ifdef ASSERT_PDIR_SANITY unsigned long flags; -#ifdef ALLOW_IOV_BYPASS +#endif +#ifdef ALLOW_IOV_BYPASS_SG struct scatterlist *sg; #endif @@ -1241,8 +1353,9 @@ int sba_map_sg(struct device *dev, struc ioc = GET_IOC(dev); ASSERT(ioc); -#ifdef ALLOW_IOV_BYPASS - if (dev && dev->dma_mask && (ioc->dma_mask & ~*dev->dma_mask) == 0) { +#ifdef ALLOW_IOV_BYPASS_SG + ASSERT(to_pci_dev(dev)->dma_mask); + if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { for (sg = sglist ; filled < nents ; filled++, sg++){ sg->dma_length = sg->length; sg->dma_address = virt_to_phys(sba_sg_address(sg)); @@ -1253,21 +1366,22 @@ int sba_map_sg(struct device *dev, struc /* Fast path single entry scatterlists. */ if (nents == 1) { sglist->dma_length = sglist->length; - sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, - dir); + sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir); return 1; } - spin_lock_irqsave(&ioc->res_lock, flags); - #ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); if (sba_check_pdir(ioc,"Check before sba_map_sg()")) { sba_dump_sg(ioc, sglist, nents); panic("Check before sba_map_sg()"); } + spin_unlock_irqrestore(&ioc->res_lock, flags); #endif + prefetch(ioc->res_hint); + /* ** First coalesce the chunks and allocate I/O pdir space ** @@ -1289,14 +1403,14 @@ int sba_map_sg(struct device *dev, struc filled = sba_fill_pdir(ioc, sglist, nents); #ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); if (sba_check_pdir(ioc,"Check after sba_map_sg()")) { sba_dump_sg(ioc, sglist, nents); panic("Check after sba_map_sg()\n"); } -#endif - spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif ASSERT(coalesced == filled); DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled); @@ -1316,18 +1430,18 @@ int sba_map_sg(struct device *dev, struc */ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) { - struct ioc *ioc; #ifdef ASSERT_PDIR_SANITY + struct ioc *ioc; unsigned long flags; #endif DBG_RUN_SG("%s() START %d entries, %p,%x\n", __FUNCTION__, nents, sba_sg_address(sglist), sglist->length); +#ifdef ASSERT_PDIR_SANITY ioc = GET_IOC(dev); ASSERT(ioc); -#ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); sba_check_pdir(ioc,"Check before sba_unmap_sg()"); spin_unlock_irqrestore(&ioc->res_lock, flags); @@ -1478,6 +1592,9 @@ static void __init ioc_resource_init(struct ioc *ioc) { spin_lock_init(&ioc->res_lock); +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_init(&ioc->saved_lock); +#endif /* resource map size dictated by pdir_size */ ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ @@ -1689,13 +1806,13 @@ ioc_show(struct seq_file *s, void *v) seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); - seq_printf(s, "IOVA size : %d MB\n", ioc->iov_size/(1024*1024)); + seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) used += hweight64(*res_ptr); - seq_printf(s, "PDIR size : %d entries\n", ioc->res_size << 3); + seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); seq_printf(s, "PDIR used : %d entries\n", used); #ifdef PDIR_SEARCH_TIMING @@ -1708,7 +1825,7 @@ ioc_show(struct seq_file *s, void *v) if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; } avg /= SBA_SEARCH_SAMPLE; - seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n", + seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", min, avg, max); } #endif --- linux-2.6.3-rc2/arch/ia64/hp/sim/simeth.c 2003-07-27 12:14:38.000000000 -0700 +++ 25/arch/ia64/hp/sim/simeth.c 2004-02-12 00:39:30.000000000 -0800 @@ -224,7 +224,7 @@ simeth_probe1(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3-rc2/arch/ia64/ia32/ia32_ioctl.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/ia64/ia32/ia32_ioctl.c 2004-02-12 00:40:52.000000000 -0800 @@ -8,6 +8,7 @@ */ #include /* argh, msdos_fs.h isn't self-contained... */ +#include #include "ia32priv.h" #define INCLUDES @@ -26,8 +27,6 @@ _ret; \ }) -asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - #define CODE #include "compat_ioctl.c" --- linux-2.6.3-rc2/arch/ia64/ia32/ia32_signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/ia64/ia32/ia32_signal.c 2004-02-12 00:40:54.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -550,9 +551,6 @@ sys32_rt_sigaction (int sig, struct siga } -extern asmlinkage long sys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, - size_t sigsetsize); - asmlinkage long sys32_rt_sigprocmask (int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize) { @@ -584,8 +582,6 @@ asmlinkage long sys32_rt_sigtimedwait (compat_sigset_t *uthese, siginfo_t32 *uinfo, struct compat_timespec *uts, unsigned int sigsetsize) { - extern asmlinkage long sys_rt_sigtimedwait (const sigset_t *, siginfo_t *, - const struct timespec *, size_t); extern int copy_siginfo_to_user32 (siginfo_t32 *, siginfo_t *); mm_segment_t old_fs = get_fs(); struct timespec t; @@ -611,7 +607,6 @@ sys32_rt_sigtimedwait (compat_sigset_t * asmlinkage long sys32_rt_sigqueueinfo (int pid, int sig, siginfo_t32 *uinfo) { - extern asmlinkage long sys_rt_sigqueueinfo (int, int, siginfo_t *); extern int copy_siginfo_from_user32 (siginfo_t *to, siginfo_t32 *from); mm_segment_t old_fs = get_fs(); siginfo_t info; --- linux-2.6.3-rc2/arch/ia64/ia32/sys_ia32.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/ia32/sys_ia32.c 2004-02-12 00:40:57.000000000 -0800 @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -55,6 +56,7 @@ #include #include #include +#include #include "ia32priv.h" @@ -81,16 +83,9 @@ #define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid)) #define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid)) -extern asmlinkage long sys_execve (char *, char **, char **, struct pt_regs *); -extern asmlinkage long sys_mprotect (unsigned long, size_t, unsigned long); -extern asmlinkage long sys_munmap (unsigned long, size_t); extern unsigned long arch_get_unmapped_area (struct file *, unsigned long, unsigned long, unsigned long, unsigned long); -/* forward declaration: */ -asmlinkage long sys32_mprotect (unsigned int, unsigned int, int); -asmlinkage unsigned long sys_brk(unsigned long); - /* * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore * while doing so. @@ -949,9 +944,6 @@ sys32_old_select (struct sel_arg_struct (struct compat_timeval *) A(a.tvp)); } -asmlinkage ssize_t sys_readv (unsigned long,const struct iovec *,unsigned long); -asmlinkage ssize_t sys_writev (unsigned long,const struct iovec *,unsigned long); - static struct iovec * get_compat_iovec (struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type) { @@ -2023,9 +2015,6 @@ restore_ia32_fpxstate (struct task_struc return 0; } -extern asmlinkage long sys_ptrace (long, pid_t, unsigned long, unsigned long, long, long, long, - long, long); - /* * Note that the IA32 version of `ptrace' calls the IA64 routine for * many of the requests. This will only work for requests that do @@ -2307,8 +2296,6 @@ struct sysctl32 { unsigned int __unused[4]; }; -extern asmlinkage long sys_sysctl(struct __sysctl_args *args); - asmlinkage long sys32_sysctl (struct sysctl32 *args) { @@ -2358,7 +2345,6 @@ sys32_sysctl (struct sysctl32 *args) asmlinkage long sys32_newuname (struct new_utsname *name) { - extern asmlinkage long sys_newuname(struct new_utsname * name); int ret = sys_newuname(name); if (!ret) @@ -2367,8 +2353,6 @@ sys32_newuname (struct new_utsname *name return ret; } -extern asmlinkage long sys_getresuid (uid_t *ruid, uid_t *euid, uid_t *suid); - asmlinkage long sys32_getresuid16 (u16 *ruid, u16 *euid, u16 *suid) { @@ -2385,8 +2369,6 @@ sys32_getresuid16 (u16 *ruid, u16 *euid, return ret; } -extern asmlinkage long sys_getresgid (gid_t *rgid, gid_t *egid, gid_t *sgid); - asmlinkage long sys32_getresgid16 (u16 *rgid, u16 *egid, u16 *sgid) { @@ -2407,65 +2389,100 @@ sys32_getresgid16 (u16 *rgid, u16 *egid, asmlinkage long sys32_lseek (unsigned int fd, int offset, unsigned int whence) { - extern off_t sys_lseek (unsigned int fd, off_t offset, unsigned int origin); - /* Sign-extension of "offset" is important here... */ return sys_lseek(fd, offset, whence); } -extern asmlinkage long sys_getgroups (int gidsetsize, gid_t *grouplist); +static int +groups16_to_user(short *grouplist, struct group_info *group_info) +{ + int i; + short group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (short)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int +groups16_from_user(struct group_info *group_info, short *grouplist) +{ + int i; + short group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} asmlinkage long sys32_getgroups16 (int gidsetsize, short *grouplist) { - mm_segment_t old_fs = get_fs(); - gid_t gl[NGROUPS]; - int ret, i; + int i; - set_fs(KERNEL_DS); - ret = sys_getgroups(gidsetsize, gl); - set_fs(old_fs); + if (gidsetsize < 0) + return -EINVAL; - if (gidsetsize && ret > 0 && ret <= NGROUPS) - for (i = 0; i < ret; i++, grouplist++) - if (put_user(gl[i], grouplist)) - return -EFAULT; - return ret; + get_group_info(current->group_info); + i = current->group_info->ngroups; + if (gidsetsize) { + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } + } +out: + put_group_info(current->group_info); + return i; } -extern asmlinkage long sys_setgroups (int gidsetsize, gid_t *grouplist); - asmlinkage long sys32_setgroups16 (int gidsetsize, short *grouplist) { - mm_segment_t old_fs = get_fs(); - gid_t gl[NGROUPS]; - int ret, i; + struct group_info *group_info; + int retval; - if ((unsigned) gidsetsize > NGROUPS) + if (!capable(CAP_SETGID)) + return -EPERM; + if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; - for (i = 0; i < gidsetsize; i++, grouplist++) - if (get_user(gl[i], grouplist)) - return -EFAULT; - set_fs(KERNEL_DS); - ret = sys_setgroups(gidsetsize, gl); - set_fs(old_fs); - return ret; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; } asmlinkage long sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi) { - extern asmlinkage long sys_truncate (const char *path, unsigned long length); - return sys_truncate((const char *) A(path), ((unsigned long) len_hi << 32) | len_lo); } asmlinkage long sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi) { - extern asmlinkage long sys_ftruncate (int fd, unsigned long length); - return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo); } @@ -2554,7 +2571,6 @@ struct sysinfo32 { asmlinkage long sys32_sysinfo (struct sysinfo32 *info) { - extern asmlinkage long sys_sysinfo (struct sysinfo *); struct sysinfo s; long ret, err; int bitcount = 0; @@ -2606,7 +2622,6 @@ sys32_sysinfo (struct sysinfo32 *info) asmlinkage long sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec *interval) { - extern asmlinkage long sys_sched_rr_get_interval (pid_t, struct timespec *); mm_segment_t old_fs = get_fs(); struct timespec t; long ret; @@ -2622,21 +2637,18 @@ sys32_sched_rr_get_interval (pid_t pid, asmlinkage long sys32_pread (unsigned int fd, void *buf, unsigned int count, u32 pos_lo, u32 pos_hi) { - extern asmlinkage long sys_pread64 (unsigned int, char *, size_t, loff_t); return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo); } asmlinkage long sys32_pwrite (unsigned int fd, void *buf, unsigned int count, u32 pos_lo, u32 pos_hi) { - extern asmlinkage long sys_pwrite64 (unsigned int, const char *, size_t, loff_t); return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo); } asmlinkage long sys32_sendfile (int out_fd, int in_fd, int *offset, unsigned int count) { - extern asmlinkage long sys_sendfile (int, int, off_t *, size_t); mm_segment_t old_fs = get_fs(); long ret; off_t of; @@ -2657,7 +2669,6 @@ sys32_sendfile (int out_fd, int in_fd, i asmlinkage long sys32_personality (unsigned int personality) { - extern asmlinkage long sys_personality (unsigned long); long ret; if (current->personality == PER_LINUX32 && personality == PER_LINUX) @@ -3049,9 +3060,6 @@ copy_mount_stuff_to_kernel(const void *u return 0; } -extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void *data); - #define SMBFS_NAME "smbfs" #define NCPFS_NAME "ncpfs" @@ -3116,8 +3124,6 @@ sys32_mount(char *dev_name, char *dir_na } } -extern asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); - asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid) { uid_t sruid, seuid; @@ -3127,8 +3133,6 @@ asmlinkage long sys32_setreuid(compat_ui return sys_setreuid(sruid, seuid); } -extern asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); - asmlinkage long sys32_setresuid(compat_uid_t ruid, compat_uid_t euid, compat_uid_t suid) @@ -3141,8 +3145,6 @@ sys32_setresuid(compat_uid_t ruid, compa return sys_setresuid(sruid, seuid, ssuid); } -extern asmlinkage long sys_setregid(gid_t rgid, gid_t egid); - asmlinkage long sys32_setregid(compat_gid_t rgid, compat_gid_t egid) { @@ -3153,8 +3155,6 @@ sys32_setregid(compat_gid_t rgid, compat return sys_setregid(srgid, segid); } -extern asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); - asmlinkage long sys32_setresgid(compat_gid_t rgid, compat_gid_t egid, compat_gid_t sgid) @@ -3284,8 +3284,6 @@ nfs_getfh32_res_trans(union nfsctl_res * return err; } -extern asmlinkage long sys_nfsservctl(int cmd, void *arg, void *resp); - int asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) { --- linux-2.6.3-rc2/arch/ia64/Kconfig 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/Kconfig 2004-02-12 00:41:21.000000000 -0800 @@ -662,6 +662,13 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + endmenu source "security/Kconfig" --- linux-2.6.3-rc2/arch/ia64/kernel/efivars.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/ia64/kernel/efivars.c 2004-02-12 00:39:25.000000000 -0800 @@ -29,9 +29,12 @@ * * Changelog: * + * 10 Feb 2004 - Stephane Eranian + * Provide FPSWA version number via /proc/efi/fpswa + * * 10 Dec 2002 - Matt Domsch * fix locking per Peter Chubb's findings - * + * * 25 Mar 2002 - Matt Domsch * move uuid_unparse() to include/asm-ia64/efi.h:efi_guid_unparse() * @@ -70,6 +73,7 @@ #include #include +#include #include MODULE_AUTHOR("Matt Domsch "); @@ -407,6 +411,37 @@ static struct file_operations efi_systab .read = efi_systab_read, }; +static ssize_t +efi_fpswa_read (struct file *file, char *buffer, size_t count, loff_t *ppos) +{ + ssize_t size, length; + char str[32]; + void *data; + + snprintf(str, sizeof(str), "revision=%u.%u\n", + fpswa_interface->revision >> 16, fpswa_interface->revision & 0xffff); + + length = strlen(str); + + if (*ppos >= length) + return 0; + + data = str + file->f_pos; + size = length - file->f_pos; + if (size > count) + size = count; + if (copy_to_user(buffer, data, size)) + return -EFAULT; + + *ppos += size; + return size; +} + +static struct proc_dir_entry *efi_fpswa_entry; +static struct file_operations efi_fpswa_fops = { + .read = efi_fpswa_read, +}; + static int __init efivars_init(void) { @@ -429,6 +464,12 @@ efivars_init(void) if (efi_systab_entry) efi_systab_entry->proc_fops = &efi_systab_fops; + if (fpswa_interface) { + efi_fpswa_entry = create_proc_entry("fpswa", S_IRUGO, efi_dir); + if (efi_fpswa_entry) + efi_fpswa_entry->proc_fops = &efi_fpswa_fops; + } + efi_vars_dir = proc_mkdir("vars", efi_dir); /* Per EFI spec, the maximum storage allocated for both --- linux-2.6.3-rc2/arch/ia64/kernel/irq.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -942,7 +942,7 @@ void set_irq_affinity_info (unsigned int static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -1007,7 +1007,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/ia64/kernel/mca.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ia64/kernel/mca.c 2004-02-12 00:39:25.000000000 -0800 @@ -18,7 +18,7 @@ * Copyright (C) 2000 Intel * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com) * - * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 1999, 2004 Silicon Graphics, Inc. * Copyright (C) Vijay Chander(vijay@engr.sgi.com) * * 03/04/15 D. Mosberger Added INIT backtrace support. @@ -40,6 +40,14 @@ * 2003-12-08 Keith Owens * smp_call_function() must not be called from interrupt context (can * deadlock on tasklist_lock). Use keventd to call smp_call_function(). + * + * 2004-02-01 Keith Owens + * Avoid deadlock when using printk() for MCA and INIT records. + * Delete all record printing code, moved to salinfo_decode in user space. + * Mark variables and functions static where possible. + * Delete dead variables and functions. + * Reorder to remove the need for forward declarations and to consolidate + * related code. */ #include #include @@ -68,14 +76,18 @@ #include #include -#undef MCA_PRT_XTRA_DATA +#if defined(IA64_MCA_DEBUG_INFO) +# define IA64_MCA_DEBUG(fmt...) printk(fmt) +#else +# define IA64_MCA_DEBUG(fmt...) +#endif typedef struct ia64_fptr { unsigned long fp; unsigned long gp; } ia64_fptr_t; -ia64_mc_info_t ia64_mc_info; +/* Used by mca_asm.S */ ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; u64 ia64_mca_proc_state_dump[512]; @@ -83,56 +95,17 @@ u64 ia64_mca_stack[1024] __attribute_ u64 ia64_mca_stackframe[32]; u64 ia64_mca_bspstore[1024]; u64 ia64_init_stack[KERNEL_STACK_SIZE/8] __attribute__((aligned(16))); -u64 ia64_os_mca_recovery_successful; u64 ia64_mca_serialize; -static void ia64_mca_wakeup_ipi_wait(void); -static void ia64_mca_wakeup(int cpu); -static void ia64_mca_wakeup_all(void); -static void ia64_log_init(int); + +/* In mca_asm.S */ extern void ia64_monarch_init_handler (void); extern void ia64_slave_init_handler (void); -static u64 ia64_log_get(int sal_info_type, u8 **buffer); -extern struct hw_interrupt_type irq_type_iosapic_level; - -struct ia64_mca_tlb_info ia64_mca_tlb_list[NR_CPUS]; - -static struct irqaction cmci_irqaction = { - .handler = ia64_mca_cmc_int_handler, - .flags = SA_INTERRUPT, - .name = "cmc_hndlr" -}; - -static struct irqaction cmcp_irqaction = { - .handler = ia64_mca_cmc_int_caller, - .flags = SA_INTERRUPT, - .name = "cmc_poll" -}; - -static struct irqaction mca_rdzv_irqaction = { - .handler = ia64_mca_rendez_int_handler, - .flags = SA_INTERRUPT, - .name = "mca_rdzv" -}; -static struct irqaction mca_wkup_irqaction = { - .handler = ia64_mca_wakeup_int_handler, - .flags = SA_INTERRUPT, - .name = "mca_wkup" -}; +static ia64_mc_info_t ia64_mc_info; -#ifdef CONFIG_ACPI -static struct irqaction mca_cpe_irqaction = { - .handler = ia64_mca_cpe_int_handler, - .flags = SA_INTERRUPT, - .name = "cpe_hndlr" -}; +extern struct hw_interrupt_type irq_type_iosapic_level; -static struct irqaction mca_cpep_irqaction = { - .handler = ia64_mca_cpe_int_caller, - .flags = SA_INTERRUPT, - .name = "cpe_poll" -}; -#endif /* CONFIG_ACPI */ +struct ia64_mca_tlb_info ia64_mca_tlb_list[NR_CPUS]; #define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ #define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ @@ -156,59 +129,152 @@ static int cmc_polling_enabled = 1; */ static int cpe_poll_enabled = 1; -extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size); +extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); + +/* + * IA64_MCA log support + */ +#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ +#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ + +typedef struct ia64_state_log_s +{ + spinlock_t isl_lock; + int isl_index; + unsigned long isl_count; + ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ +} ia64_state_log_t; + +static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; + +#define IA64_LOG_ALLOCATE(it, size) \ + {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size); \ + ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size);} +#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) +#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) +#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) +#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index +#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index +#define IA64_LOG_INDEX_INC(it) \ + {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ + ia64_state_log[it].isl_count++;} +#define IA64_LOG_INDEX_DEC(it) \ + ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index +#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) +#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) +#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count + +/* + * ia64_log_init + * Reset the OS ia64 log buffer + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * Outputs : None + */ +static void +ia64_log_init(int sal_info_type) +{ + u64 max_size = 0; + + IA64_LOG_NEXT_INDEX(sal_info_type) = 0; + IA64_LOG_LOCK_INIT(sal_info_type); + + // SAL will tell us the maximum size of any error record of this type + max_size = ia64_sal_get_state_info_size(sal_info_type); + if (!max_size) + /* alloc_bootmem() doesn't like zero-sized allocations! */ + return; + + // set up OS data structures to hold error info + IA64_LOG_ALLOCATE(sal_info_type, max_size); + memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); + memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); +} + +/* + * ia64_log_get + * + * Get the current MCA log from SAL and copy it into the OS log buffer. + * + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * irq_safe whether you can use printk at this point + * Outputs : size (total record length) + * *buffer (ptr to error record) + * + */ +static u64 +ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) +{ + sal_log_record_header_t *log_buffer; + u64 total_len = 0; + int s; + + IA64_LOG_LOCK(sal_info_type); + + /* Get the process state information */ + log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); + + total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); + + if (total_len) { + IA64_LOG_INDEX_INC(sal_info_type); + IA64_LOG_UNLOCK(sal_info_type); + if (irq_safe) { + IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. " + "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len); + } + *buffer = (u8 *) log_buffer; + return total_len; + } else { + IA64_LOG_UNLOCK(sal_info_type); + return 0; + } +} /* * ia64_mca_log_sal_error_record * - * This function retrieves a specified error record type from SAL, - * wakes up any processes waiting for error records, and sends it to - * the system log. + * This function retrieves a specified error record type from SAL + * and wakes up any processes waiting for error records. * * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT) - * Outputs : platform error status + * called_from_init (1 for boot processing) */ -int +static void ia64_mca_log_sal_error_record(int sal_info_type, int called_from_init) { u8 *buffer; u64 size; - int platform_err; + int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT; + static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; - size = ia64_log_get(sal_info_type, &buffer); + size = ia64_log_get(sal_info_type, &buffer, irq_safe); if (!size) - return 0; + return; - /* TODO: - * 1. analyze error logs to determine recoverability - * 2. perform error recovery procedures, if applicable - * 3. set ia64_os_mca_recovery_successful flag, if applicable - */ + salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); + + if (irq_safe || called_from_init) + printk(KERN_INFO "CPU %d: SAL log contains %s error record\n", + smp_processor_id(), + sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); - salinfo_log_wakeup(sal_info_type, buffer, size); - platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk); /* Clear logs from corrected errors in case there's no user-level logger */ if (sal_info_type == SAL_INFO_TYPE_CPE || sal_info_type == SAL_INFO_TYPE_CMC) ia64_sal_clear_state_info(sal_info_type); - - return platform_err; } /* * platform dependent error handling */ #ifndef PLATFORM_MCA_HANDLERS -void -mca_handler_platform (void) -{ -} - -irqreturn_t +static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) { - IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. CPU:%d vector = %#x\n", - smp_processor_id(), cpe_irq); + IA64_MCA_DEBUG("%s: received interrupt. CPU:%d vector = %#x\n", + __FUNCTION__, smp_processor_id(), cpe_irq); /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); @@ -356,7 +422,7 @@ fetch_min_state (pal_min_state_area_t *m PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31); } -void +static void init_handler_platform (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw) { @@ -404,23 +470,6 @@ init_handler_platform (pal_min_state_are } /* - * ia64_mca_init_platform - * - * External entry for platform specific MCA initialization. - * - * Inputs - * None - * - * Outputs - * None - */ -void -ia64_mca_init_platform (void) -{ - -} - -/* * ia64_mca_check_errors * * External entry to check for error records which may have been posted by SAL @@ -438,6 +487,7 @@ ia64_mca_check_errors (void) /* * If there is an MCA error record pending, get it and log it. */ + printk(KERN_INFO "CPU %d: checking for saved MCA error records\n", smp_processor_id()); ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA, 1); return 0; @@ -465,13 +515,13 @@ ia64_mca_register_cpev (int cpev) isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); if (isrv.status) { - printk(KERN_ERR "ia64_mca_platform_init: failed to register Corrected " - "Platform Error interrupt vector with SAL.\n"); + printk(KERN_ERR "Failed to register Corrected Platform " + "Error interrupt vector with SAL (status %ld)\n", isrv.status); return; } - IA64_MCA_DEBUG("ia64_mca_platform_init: corrected platform error " - "vector %#x setup and enabled\n", cpev); + IA64_MCA_DEBUG("%s: corrected platform error " + "vector %#x setup and enabled\n", __FUNCTION__, cpev); } #endif /* CONFIG_ACPI */ @@ -499,12 +549,12 @@ ia64_mca_cmc_vector_setup (void) cmcv.cmcv_vector = IA64_CMC_VECTOR; ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); - IA64_MCA_DEBUG("ia64_mca_platform_init: CPU %d corrected " + IA64_MCA_DEBUG("%s: CPU %d corrected " "machine check vector %#x setup and enabled.\n", - smp_processor_id(), IA64_CMC_VECTOR); + __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR); - IA64_MCA_DEBUG("ia64_mca_platform_init: CPU %d CMCV = %#016lx\n", - smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); + IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", + __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); } /* @@ -519,7 +569,7 @@ ia64_mca_cmc_vector_setup (void) * Outputs * None */ -void +static void ia64_mca_cmc_vector_disable (void *dummy) { cmcv_reg_t cmcv; @@ -529,9 +579,9 @@ ia64_mca_cmc_vector_disable (void *dummy cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval) - IA64_MCA_DEBUG("ia64_mca_cmc_vector_disable: CPU %d corrected " + IA64_MCA_DEBUG("%s: CPU %d corrected " "machine check vector %#x disabled.\n", - smp_processor_id(), cmcv.cmcv_vector); + __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); } /* @@ -546,7 +596,7 @@ ia64_mca_cmc_vector_disable (void *dummy * Outputs * None */ -void +static void ia64_mca_cmc_vector_enable (void *dummy) { cmcv_reg_t cmcv; @@ -556,63 +606,9 @@ ia64_mca_cmc_vector_enable (void *dummy) cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval) - IA64_MCA_DEBUG("ia64_mca_cmc_vector_enable: CPU %d corrected " + IA64_MCA_DEBUG("%s: CPU %d corrected " "machine check vector %#x enabled.\n", - smp_processor_id(), cmcv.cmcv_vector); -} - - -#if defined(MCA_TEST) - -sal_log_processor_info_t slpi_buf; - -void -mca_test(void) -{ - slpi_buf.valid.psi_static_struct = 1; - slpi_buf.valid.num_cache_check = 1; - slpi_buf.valid.num_tlb_check = 1; - slpi_buf.valid.num_bus_check = 1; - slpi_buf.valid.processor_static_info.minstate = 1; - slpi_buf.valid.processor_static_info.br = 1; - slpi_buf.valid.processor_static_info.cr = 1; - slpi_buf.valid.processor_static_info.ar = 1; - slpi_buf.valid.processor_static_info.rr = 1; - slpi_buf.valid.processor_static_info.fr = 1; - - ia64_os_mca_dispatch(); -} - -#endif /* #if defined(MCA_TEST) */ - - -/* - * verify_guid - * - * Compares a test guid to a target guid and returns result. - * - * Inputs - * test_guid * (ptr to guid to be verified) - * target_guid * (ptr to standard guid to be verified against) - * - * Outputs - * 0 (test verifies against target) - * non-zero (test guid does not verify) - */ -static int -verify_guid (efi_guid_t *test, efi_guid_t *target) -{ - int rc; -#ifdef IA64_MCA_DEBUG_INFO - char out[40]; -#endif - - if ((rc = efi_guidcmp(*test, *target))) { - IA64_MCA_DEBUG(KERN_DEBUG - "verify_guid: invalid GUID = %s\n", - efi_guid_unparse(test, out)); - } - return rc; + __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); } /* @@ -640,239 +636,54 @@ ia64_mca_cmc_vector_enable_keventd(void } /* - * ia64_mca_init - * - * Do all the system level mca specific initialization. - * - * 1. Register spinloop and wakeup request interrupt vectors - * - * 2. Register OS_MCA handler entry point - * - * 3. Register OS_INIT handler entry point - * - * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. + * ia64_mca_wakeup_ipi_wait * - * Note that this initialization is done very early before some kernel - * services are available. + * Wait for the inter-cpu interrupt to be sent by the + * monarch processor once it is done with handling the + * MCA. * * Inputs : None - * * Outputs : None */ -void __init -ia64_mca_init(void) +static void +ia64_mca_wakeup_ipi_wait(void) { - ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler; - ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler; - ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; - int i; - s64 rc; - struct ia64_sal_retval isrv; - u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6); + int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f); + u64 irr = 0; - IA64_MCA_DEBUG("ia64_mca_init: begin\n"); + do { + switch(irr_num) { + case 0: + irr = ia64_getreg(_IA64_REG_CR_IRR0); + break; + case 1: + irr = ia64_getreg(_IA64_REG_CR_IRR1); + break; + case 2: + irr = ia64_getreg(_IA64_REG_CR_IRR2); + break; + case 3: + irr = ia64_getreg(_IA64_REG_CR_IRR3); + break; + } + } while (!(irr & (1UL << irr_bit))) ; +} - /* initialize recovery success indicator */ - ia64_os_mca_recovery_successful = 0; - - /* Clear the Rendez checkin flag for all cpus */ - for(i = 0 ; i < NR_CPUS; i++) - ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; - - /* - * Register the rendezvous spinloop and wakeup mechanism with SAL - */ - - /* Register the rendezvous interrupt vector with SAL */ - while (1) { - isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, - SAL_MC_PARAM_MECHANISM_INT, - IA64_MCA_RENDEZ_VECTOR, - timeout, - SAL_MC_PARAM_RZ_ALWAYS); - rc = isrv.status; - if (rc == 0) - break; - if (rc == -2) { - printk(KERN_INFO "ia64_mca_init: increasing MCA rendezvous timeout from " - "%ld to %ld\n", timeout, isrv.v0); - timeout = isrv.v0; - continue; - } - printk(KERN_ERR "ia64_mca_init: Failed to register rendezvous interrupt " - "with SAL. rc = %ld\n", rc); - return; - } - - /* Register the wakeup interrupt vector with SAL */ - isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, - SAL_MC_PARAM_MECHANISM_INT, - IA64_MCA_WAKEUP_VECTOR, - 0, 0); - rc = isrv.status; - if (rc) { - printk(KERN_ERR "ia64_mca_init: Failed to register wakeup interrupt with SAL. " - "rc = %ld\n", rc); - return; - } - - IA64_MCA_DEBUG("ia64_mca_init: registered mca rendezvous spinloop and wakeup mech.\n"); - - ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); - /* - * XXX - disable SAL checksum by setting size to 0; should be - * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); - */ - ia64_mc_info.imi_mca_handler_size = 0; - - /* Register the os mca handler with SAL */ - if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, - ia64_mc_info.imi_mca_handler, - ia64_tpa(mca_hldlr_ptr->gp), - ia64_mc_info.imi_mca_handler_size, - 0, 0, 0))) - { - printk(KERN_ERR "ia64_mca_init: Failed to register os mca handler with SAL. " - "rc = %ld\n", rc); - return; - } - - IA64_MCA_DEBUG("ia64_mca_init: registered os mca handler with SAL at 0x%lx, gp = 0x%lx\n", - ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); - - /* - * XXX - disable SAL checksum by setting size to 0, should be - * IA64_INIT_HANDLER_SIZE - */ - ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp); - ia64_mc_info.imi_monarch_init_handler_size = 0; - ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp); - ia64_mc_info.imi_slave_init_handler_size = 0; - - IA64_MCA_DEBUG("ia64_mca_init: os init handler at %lx\n", - ia64_mc_info.imi_monarch_init_handler); - - /* Register the os init handler with SAL */ - if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, - ia64_mc_info.imi_monarch_init_handler, - ia64_tpa(ia64_getreg(_IA64_REG_GP)), - ia64_mc_info.imi_monarch_init_handler_size, - ia64_mc_info.imi_slave_init_handler, - ia64_tpa(ia64_getreg(_IA64_REG_GP)), - ia64_mc_info.imi_slave_init_handler_size))) - { - printk(KERN_ERR "ia64_mca_init: Failed to register m/s init handlers with SAL. " - "rc = %ld\n", rc); - return; - } - - IA64_MCA_DEBUG("ia64_mca_init: registered os init handler with SAL\n"); - - /* - * Configure the CMCI/P vector and handler. Interrupts for CMC are - * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). - */ - register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); - register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); - ia64_mca_cmc_vector_setup(); /* Setup vector on BSP & enable */ - - /* Setup the MCA rendezvous interrupt vector */ - register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); - - /* Setup the MCA wakeup interrupt vector */ - register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); - -#ifdef CONFIG_ACPI - /* Setup the CPE interrupt vector */ - { - irq_desc_t *desc; - unsigned int irq; - int cpev = acpi_request_vector(ACPI_INTERRUPT_CPEI); - - if (cpev >= 0) { - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == cpev) { - desc = irq_descp(irq); - desc->status |= IRQ_PER_CPU; - desc->handler = &irq_type_iosapic_level; - setup_irq(irq, &mca_cpe_irqaction); - } - ia64_mca_register_cpev(cpev); - } - } -#endif - - /* Initialize the areas set aside by the OS to buffer the - * platform/processor error states for MCA/INIT/CMC - * handling. - */ - ia64_log_init(SAL_INFO_TYPE_MCA); - ia64_log_init(SAL_INFO_TYPE_INIT); - ia64_log_init(SAL_INFO_TYPE_CMC); - ia64_log_init(SAL_INFO_TYPE_CPE); - -#if defined(MCA_TEST) - mca_test(); -#endif /* #if defined(MCA_TEST) */ - - printk(KERN_INFO "Mca related initialization done\n"); - - /* commented out because this is done elsewhere */ -#if 0 - /* Do post-failure MCA error logging */ - ia64_mca_check_errors(); -#endif -} - -/* - * ia64_mca_wakeup_ipi_wait - * - * Wait for the inter-cpu interrupt to be sent by the - * monarch processor once it is done with handling the - * MCA. - * - * Inputs : None - * Outputs : None - */ -void -ia64_mca_wakeup_ipi_wait(void) -{ - int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6); - int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f); - u64 irr = 0; - - do { - switch(irr_num) { - case 0: - irr = ia64_getreg(_IA64_REG_CR_IRR0); - break; - case 1: - irr = ia64_getreg(_IA64_REG_CR_IRR1); - break; - case 2: - irr = ia64_getreg(_IA64_REG_CR_IRR2); - break; - case 3: - irr = ia64_getreg(_IA64_REG_CR_IRR3); - break; - } - } while (!(irr & (1UL << irr_bit))) ; -} - -/* - * ia64_mca_wakeup - * - * Send an inter-cpu interrupt to wake-up a particular cpu - * and mark that cpu to be out of rendez. - * - * Inputs : cpuid - * Outputs : None - */ -void -ia64_mca_wakeup(int cpu) -{ - platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; +/* + * ia64_mca_wakeup + * + * Send an inter-cpu interrupt to wake-up a particular cpu + * and mark that cpu to be out of rendez. + * + * Inputs : cpuid + * Outputs : None + */ +static void +ia64_mca_wakeup(int cpu) +{ + platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; } @@ -884,7 +695,7 @@ ia64_mca_wakeup(int cpu) * Inputs : None * Outputs : None */ -void +static void ia64_mca_wakeup_all(void) { int cpu; @@ -909,7 +720,7 @@ ia64_mca_wakeup_all(void) * Inputs : None * Outputs : None */ -irqreturn_t +static irqreturn_t ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) { unsigned long flags; @@ -935,7 +746,6 @@ ia64_mca_rendez_int_handler(int rendez_i return IRQ_HANDLED; } - /* * ia64_mca_wakeup_int_handler * @@ -951,7 +761,7 @@ ia64_mca_rendez_int_handler(int rendez_i * Outputs : None * */ -irqreturn_t +static irqreturn_t ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) { return IRQ_HANDLED; @@ -971,11 +781,9 @@ ia64_mca_wakeup_int_handler(int wakeup_i * Outputs : None */ -void -ia64_return_to_sal_check(void) +static void +ia64_return_to_sal_check(int recover) { - pal_processor_state_info_t *psp = (pal_processor_state_info_t *) - &ia64_sal_to_os_handoff_state.proc_state_param; /* Copy over some relevant stuff from the sal_to_os_mca_handoff * so that it can be used at the time of os_mca_to_sal_handoff @@ -986,15 +794,10 @@ ia64_return_to_sal_check(void) ia64_os_to_sal_handoff_state.imots_sal_check_ra = ia64_sal_to_os_handoff_state.imsto_sal_check_ra; - /* - * Did we correct the error? At the moment the only error that - * we fix is a TLB error, if any other kind of error occurred - * we must reboot. - */ - if (psp->cc == 1 && psp->bc == 1 && psp->rc == 1 && psp->uc == 1) - ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; - else + if (recover) ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; + else + ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; /* Default = tell SAL to return to same context */ ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT; @@ -1023,16 +826,12 @@ ia64_return_to_sal_check(void) void ia64_mca_ucmc_handler(void) { - int platform_err = 0; + pal_processor_state_info_t *psp = (pal_processor_state_info_t *) + &ia64_sal_to_os_handoff_state.proc_state_param; + int recover = psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc); /* Get the MCA error record and log it */ - platform_err = ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA, 0); - - /* - * Do Platform-specific mca error handling if required. - */ - if (platform_err) - mca_handler_platform(); + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA, 0); /* * Wakeup all the processors which are spinning in the rendezvous @@ -1041,7 +840,7 @@ ia64_mca_ucmc_handler(void) ia64_mca_wakeup_all(); /* Return to SAL */ - ia64_return_to_sal_check(); + ia64_return_to_sal_check(recover); } static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); @@ -1062,15 +861,15 @@ static DECLARE_WORK(cmc_enable_work, ia6 * Outputs * None */ -irqreturn_t +static irqreturn_t ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) { static unsigned long cmc_history[CMC_HISTORY_LENGTH]; static int index; static spinlock_t cmc_history_lock = SPIN_LOCK_UNLOCKED; - IA64_MCA_DEBUG("ia64_mca_cmc_int_handler: received interrupt vector = %#x on CPU %d\n", - cmc_irq, smp_processor_id()); + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __FUNCTION__, cmc_irq, smp_processor_id()); /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); @@ -1100,7 +899,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, vo * make sure there's a log somewhere that indicates * something is generating more than we can handle. */ - printk(KERN_WARNING "%s: WARNING: Switching to polling CMC handler, error records may be lost\n", __FUNCTION__); + printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); @@ -1117,41 +916,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, vo } /* - * IA64_MCA log support - */ -#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ -#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ - -typedef struct ia64_state_log_s -{ - spinlock_t isl_lock; - int isl_index; - unsigned long isl_count; - ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ -} ia64_state_log_t; - -static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; - -#define IA64_LOG_ALLOCATE(it, size) \ - {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ - (ia64_err_rec_t *)alloc_bootmem(size); \ - ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ - (ia64_err_rec_t *)alloc_bootmem(size);} -#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) -#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) -#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) -#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index -#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index -#define IA64_LOG_INDEX_INC(it) \ - {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ - ia64_state_log[it].isl_count++;} -#define IA64_LOG_INDEX_DEC(it) \ - ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index -#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) -#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) -#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count - -/* * ia64_mca_cmc_int_caller * * Triggered by sw interrupt from CMC polling routine. Calls @@ -1165,7 +929,7 @@ static ia64_state_log_t ia64_state_log[I * Outputs * handled */ -irqreturn_t +static irqreturn_t ia64_mca_cmc_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) { static int start_count = -1; @@ -1184,10 +948,10 @@ ia64_mca_cmc_int_caller(int cpe_irq, voi if (cpuid < NR_CPUS) { platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); } else { - /* If no log recored, switch out of polling mode */ + /* If no log record, switch out of polling mode */ if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { - printk(KERN_WARNING "%s: Returning to interrupt driven CMC handler\n", __FUNCTION__); + printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); schedule_work(&cmc_enable_work); cmc_polling_enabled = 0; @@ -1232,7 +996,7 @@ ia64_mca_cmc_poll (unsigned long dummy) * Outputs * handled */ -irqreturn_t +static irqreturn_t ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) { static int start_count = -1; @@ -1286,59 +1050,26 @@ ia64_mca_cpe_poll (unsigned long dummy) } /* - * ia64_mca_late_init + * C portion of the OS INIT handler * - * Opportunity to setup things that require initialization later - * than ia64_mca_init. Setup a timer to poll for CPEs if the - * platform doesn't support an interrupt driven mechanism. + * Called from ia64_monarch_init_handler + * + * Inputs: pointer to pt_regs where processor info was saved. + * + * Returns: + * 0 if SAL must warm boot the System + * 1 if SAL must return to interrupted context using PAL_MC_RESUME * - * Inputs : None - * Outputs : Status */ -static int __init -ia64_mca_late_init(void) +void +ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw) { - init_timer(&cmc_poll_timer); - cmc_poll_timer.function = ia64_mca_cmc_poll; + pal_min_state_area_t *ms; - /* Reset to the correct state */ - cmc_polling_enabled = 0; + oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */ - init_timer(&cpe_poll_timer); - cpe_poll_timer.function = ia64_mca_cpe_poll; - -#ifdef CONFIG_ACPI - /* If platform doesn't support CPEI, get the timer going. */ - if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0 && cpe_poll_enabled) { - register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); - ia64_mca_cpe_poll(0UL); - } -#endif - - return 0; -} - -device_initcall(ia64_mca_late_init); - -/* - * C portion of the OS INIT handler - * - * Called from ia64_monarch_init_handler - * - * Inputs: pointer to pt_regs where processor info was saved. - * - * Returns: - * 0 if SAL must warm boot the System - * 1 if SAL must return to interrupted context using PAL_MC_RESUME - * - */ -void -ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw) -{ - pal_min_state_area_t *ms; - - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n", - ia64_sal_to_os_handoff_state.proc_state_param); + printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n", + ia64_sal_to_os_handoff_state.proc_state_param); /* * Address of minstate area provided by PAL is physical, @@ -1350,1083 +1081,256 @@ ia64_init_handler (struct pt_regs *pt, s init_handler_platform(ms, pt, sw); /* call platform specific routines */ } -/* - * ia64_log_prt_guid - * - * Print a formatted GUID. - * - * Inputs : p_guid (ptr to the GUID) - * prfunc (print function) - * Outputs : None - * - */ -void -ia64_log_prt_guid (efi_guid_t *p_guid, prfunc_t prfunc) -{ - char out[40]; - printk(KERN_DEBUG "GUID = %s\n", efi_guid_unparse(p_guid, out)); -} - -static void -ia64_log_hexdump(unsigned char *p, unsigned long n_ch, prfunc_t prfunc) +static int __init +ia64_mca_disable_cpe_polling(char *str) { - unsigned long i; - int j; - - if (!p) - return; - - for (i = 0; i < n_ch;) { - prfunc("%p ", (void *)p); - for (j = 0; (j < 16) && (i < n_ch); i++, j++, p++) { - prfunc("%02x ", *p); - } - prfunc("\n"); - } + cpe_poll_enabled = 0; + return 1; } -#ifdef MCA_PRT_XTRA_DATA // for test only @FVL +__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); -static void -ia64_log_prt_record_header (sal_log_record_header_t *rh, prfunc_t prfunc) -{ - prfunc("SAL RECORD HEADER: Record buffer = %p, header size = %ld\n", - (void *)rh, sizeof(sal_log_record_header_t)); - ia64_log_hexdump((unsigned char *)rh, sizeof(sal_log_record_header_t), - (prfunc_t)prfunc); - prfunc("Total record length = %d\n", rh->len); - ia64_log_prt_guid(&rh->platform_guid, prfunc); - prfunc("End of SAL RECORD HEADER\n"); -} +static struct irqaction cmci_irqaction = { + .handler = ia64_mca_cmc_int_handler, + .flags = SA_INTERRUPT, + .name = "cmc_hndlr" +}; -static void -ia64_log_prt_section_header (sal_log_section_hdr_t *sh, prfunc_t prfunc) -{ - prfunc("SAL SECTION HEADER: Record buffer = %p, header size = %ld\n", - (void *)sh, sizeof(sal_log_section_hdr_t)); - ia64_log_hexdump((unsigned char *)sh, sizeof(sal_log_section_hdr_t), - (prfunc_t)prfunc); - prfunc("Length of section & header = %d\n", sh->len); - ia64_log_prt_guid(&sh->guid, prfunc); - prfunc("End of SAL SECTION HEADER\n"); -} -#endif // MCA_PRT_XTRA_DATA for test only @FVL +static struct irqaction cmcp_irqaction = { + .handler = ia64_mca_cmc_int_caller, + .flags = SA_INTERRUPT, + .name = "cmc_poll" +}; -/* - * ia64_log_init - * Reset the OS ia64 log buffer - * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) - * Outputs : None - */ -void -ia64_log_init(int sal_info_type) -{ - u64 max_size = 0; +static struct irqaction mca_rdzv_irqaction = { + .handler = ia64_mca_rendez_int_handler, + .flags = SA_INTERRUPT, + .name = "mca_rdzv" +}; - IA64_LOG_NEXT_INDEX(sal_info_type) = 0; - IA64_LOG_LOCK_INIT(sal_info_type); +static struct irqaction mca_wkup_irqaction = { + .handler = ia64_mca_wakeup_int_handler, + .flags = SA_INTERRUPT, + .name = "mca_wkup" +}; - // SAL will tell us the maximum size of any error record of this type - max_size = ia64_sal_get_state_info_size(sal_info_type); - if (!max_size) - /* alloc_bootmem() doesn't like zero-sized allocations! */ - return; +#ifdef CONFIG_ACPI +static struct irqaction mca_cpe_irqaction = { + .handler = ia64_mca_cpe_int_handler, + .flags = SA_INTERRUPT, + .name = "cpe_hndlr" +}; - // set up OS data structures to hold error info - IA64_LOG_ALLOCATE(sal_info_type, max_size); - memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); - memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); -} +static struct irqaction mca_cpep_irqaction = { + .handler = ia64_mca_cpe_int_caller, + .flags = SA_INTERRUPT, + .name = "cpe_poll" +}; +#endif /* CONFIG_ACPI */ /* - * ia64_log_get + * ia64_mca_init * - * Get the current MCA log from SAL and copy it into the OS log buffer. + * Do all the system level mca specific initialization. * - * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) - * Outputs : size (total record length) - * *buffer (ptr to error record) + * 1. Register spinloop and wakeup request interrupt vectors * - */ -static u64 -ia64_log_get(int sal_info_type, u8 **buffer) -{ - sal_log_record_header_t *log_buffer; - u64 total_len = 0; - int s; - - IA64_LOG_LOCK(sal_info_type); - - /* Get the process state information */ - log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); - - total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); - - if (total_len) { - IA64_LOG_INDEX_INC(sal_info_type); - IA64_LOG_UNLOCK(sal_info_type); - IA64_MCA_DEBUG("ia64_log_get: SAL error record type %d retrieved. " - "Record length = %ld\n", sal_info_type, total_len); - *buffer = (u8 *) log_buffer; - return total_len; - } else { - IA64_LOG_UNLOCK(sal_info_type); - return 0; - } -} - -/* - * ia64_log_prt_oem_data + * 2. Register OS_MCA handler entry point * - * Print OEM specific data if included. + * 3. Register OS_INIT handler entry point * - * Inputs : header_len (length passed in section header) - * sect_len (default length of section type) - * p_data (ptr to data) - * prfunc (print function) - * Outputs : None + * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. * - */ -void -ia64_log_prt_oem_data (int header_len, int sect_len, u8 *p_data, prfunc_t prfunc) -{ - int oem_data_len, i; - - if ((oem_data_len = header_len - sect_len) > 0) { - prfunc(" OEM Specific Data:"); - for (i = 0; i < oem_data_len; i++, p_data++) - prfunc(" %02x", *p_data); - } - prfunc("\n"); -} - -/* - * ia64_log_rec_header_print + * Note that this initialization is done very early before some kernel + * services are available. * - * Log info from the SAL error record header. + * Inputs : None * - * Inputs : lh * (ptr to SAL log error record header) - * prfunc (fn ptr of log output function to use) * Outputs : None */ -void -ia64_log_rec_header_print (sal_log_record_header_t *lh, prfunc_t prfunc) -{ - prfunc("+Err Record ID: %ld SAL Rev: %2x.%02x\n", lh->id, - lh->revision.major, lh->revision.minor); - prfunc("+Time: %02x/%02x/%02x%02x %02x:%02x:%02x Severity %d\n", - lh->timestamp.slh_month, lh->timestamp.slh_day, - lh->timestamp.slh_century, lh->timestamp.slh_year, - lh->timestamp.slh_hour, lh->timestamp.slh_minute, - lh->timestamp.slh_second, lh->severity); -} - -/* - * ia64_log_processor_regs_print - * Print the contents of the saved processor register(s) in the format - * [] - * - * Inputs : regs (Register save buffer) - * reg_num (# of registers) - * reg_class (application/banked/control/bank1_general) - * reg_prefix (ar/br/cr/b1_gr) - * Outputs : None - * - */ -void -ia64_log_processor_regs_print(u64 *regs, - int reg_num, - char *reg_class, - char *reg_prefix, - prfunc_t prfunc) -{ - int i; - - prfunc("+%s Registers\n", reg_class); - for (i = 0; i < reg_num; i++) - prfunc("+ %s[%d] 0x%lx\n", reg_prefix, i, regs[i]); -} - -/* - * ia64_log_processor_fp_regs_print - * Print the contents of the saved floating page register(s) in the format - * [] - * - * Inputs: ia64_fpreg (Register save buffer) - * reg_num (# of registers) - * reg_class (application/banked/control/bank1_general) - * reg_prefix (ar/br/cr/b1_gr) - * Outputs: None - * - */ -void -ia64_log_processor_fp_regs_print (struct ia64_fpreg *regs, - int reg_num, - char *reg_class, - char *reg_prefix, - prfunc_t prfunc) +void __init +ia64_mca_init(void) { + ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler; + ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler; + ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; int i; + s64 rc; + struct ia64_sal_retval isrv; + u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ - prfunc("+%s Registers\n", reg_class); - for (i = 0; i < reg_num; i++) - prfunc("+ %s[%d] 0x%lx%016lx\n", reg_prefix, i, regs[i].u.bits[1], - regs[i].u.bits[0]); -} - -static char *pal_mesi_state[] = { - "Invalid", - "Shared", - "Exclusive", - "Modified", - "Reserved1", - "Reserved2", - "Reserved3", - "Reserved4" -}; - -static char *pal_cache_op[] = { - "Unknown", - "Move in", - "Cast out", - "Coherency check", - "Internal", - "Instruction fetch", - "Implicit Writeback", - "Reserved" -}; - -/* - * ia64_log_cache_check_info_print - * Display the machine check information related to cache error(s). - * Inputs: i (Multiple errors are logged, i - index of logged error) - * cc_info * (Ptr to cache check info logged by the PAL and later - * captured by the SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_cache_check_info_print (int i, - sal_log_mod_error_info_t *cache_check_info, - prfunc_t prfunc) -{ - pal_cache_check_info_t *info; - u64 target_addr; - - if (!cache_check_info->valid.check_info) { - IA64_MCA_DEBUG("ia64_mca_log_print: invalid cache_check_info[%d]\n",i); - return; /* If check info data not valid, skip it */ - } - - info = (pal_cache_check_info_t *)&cache_check_info->check_info; - target_addr = cache_check_info->target_identifier; - - prfunc("+ Cache check info[%d]\n+", i); - prfunc(" Level: L%d,",info->level); - if (info->mv) - prfunc(" Mesi: %s,",pal_mesi_state[info->mesi]); - prfunc(" Index: %d,", info->index); - if (info->ic) - prfunc(" Cache: Instruction,"); - if (info->dc) - prfunc(" Cache: Data,"); - if (info->tl) - prfunc(" Line: Tag,"); - if (info->dl) - prfunc(" Line: Data,"); - prfunc(" Operation: %s,", pal_cache_op[info->op]); - if (info->wiv) - prfunc(" Way: %d,", info->way); - if (cache_check_info->valid.target_identifier) - /* Hope target address is saved in target_identifier */ - if (info->tv) - prfunc(" Target Addr: 0x%lx,", target_addr); - if (info->mcc) - prfunc(" MC: Corrected"); - prfunc("\n"); -} - -/* - * ia64_log_tlb_check_info_print - * Display the machine check information related to tlb error(s). - * Inputs: i (Multiple errors are logged, i - index of logged error) - * tlb_info * (Ptr to machine check info logged by the PAL and later - * captured by the SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_tlb_check_info_print (int i, - sal_log_mod_error_info_t *tlb_check_info, - prfunc_t prfunc) - -{ - pal_tlb_check_info_t *info; - - if (!tlb_check_info->valid.check_info) { - IA64_MCA_DEBUG("ia64_mca_log_print: invalid tlb_check_info[%d]\n", i); - return; /* If check info data not valid, skip it */ - } - - info = (pal_tlb_check_info_t *)&tlb_check_info->check_info; - - prfunc("+ TLB Check Info [%d]\n+", i); - if (info->itc) - prfunc(" Failure: Instruction Translation Cache"); - if (info->dtc) - prfunc(" Failure: Data Translation Cache"); - if (info->itr) { - prfunc(" Failure: Instruction Translation Register"); - prfunc(" ,Slot: %ld", info->tr_slot); - } - if (info->dtr) { - prfunc(" Failure: Data Translation Register"); - prfunc(" ,Slot: %ld", info->tr_slot); - } - if (info->mcc) - prfunc(" ,MC: Corrected"); - prfunc("\n"); -} - -/* - * ia64_log_bus_check_info_print - * Display the machine check information related to bus error(s). - * Inputs: i (Multiple errors are logged, i - index of logged error) - * bus_info * (Ptr to machine check info logged by the PAL and later - * captured by the SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_bus_check_info_print (int i, - sal_log_mod_error_info_t *bus_check_info, - prfunc_t prfunc) -{ - pal_bus_check_info_t *info; - u64 req_addr; /* Address of the requestor of the transaction */ - u64 resp_addr; /* Address of the responder of the transaction */ - u64 targ_addr; /* Address where the data was to be delivered to */ - /* or obtained from */ - - if (!bus_check_info->valid.check_info) { - IA64_MCA_DEBUG("ia64_mca_log_print: invalid bus_check_info[%d]\n", i); - return; /* If check info data not valid, skip it */ - } - - info = (pal_bus_check_info_t *)&bus_check_info->check_info; - req_addr = bus_check_info->requestor_identifier; - resp_addr = bus_check_info->responder_identifier; - targ_addr = bus_check_info->target_identifier; - - prfunc("+ BUS Check Info [%d]\n+", i); - prfunc(" Status Info: %d", info->bsi); - prfunc(" ,Severity: %d", info->sev); - prfunc(" ,Transaction Type: %d", info->type); - prfunc(" ,Transaction Size: %d", info->size); - if (info->cc) - prfunc(" ,Cache-cache-transfer"); - if (info->ib) - prfunc(" ,Error: Internal"); - if (info->eb) - prfunc(" ,Error: External"); - if (info->mcc) - prfunc(" ,MC: Corrected"); - if (info->tv) - prfunc(" ,Target Address: 0x%lx", targ_addr); - if (info->rq) - prfunc(" ,Requestor Address: 0x%lx", req_addr); - if (info->tv) - prfunc(" ,Responder Address: 0x%lx", resp_addr); - prfunc("\n"); -} - -/* - * ia64_log_mem_dev_err_info_print - * - * Format and log the platform memory device error record section data. - * - * Inputs: mem_dev_err_info * (Ptr to memory device error record section - * returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_mem_dev_err_info_print (sal_log_mem_dev_err_info_t *mdei, - prfunc_t prfunc) -{ - prfunc("+ Mem Error Detail: "); - - if (mdei->valid.error_status) - prfunc(" Error Status: %#lx,", mdei->error_status); - if (mdei->valid.physical_addr) - prfunc(" Physical Address: %#lx,", mdei->physical_addr); - if (mdei->valid.addr_mask) - prfunc(" Address Mask: %#lx,", mdei->addr_mask); - if (mdei->valid.node) - prfunc(" Node: %d,", mdei->node); - if (mdei->valid.card) - prfunc(" Card: %d,", mdei->card); - if (mdei->valid.module) - prfunc(" Module: %d,", mdei->module); - if (mdei->valid.bank) - prfunc(" Bank: %d,", mdei->bank); - if (mdei->valid.device) - prfunc(" Device: %d,", mdei->device); - if (mdei->valid.row) - prfunc(" Row: %d,", mdei->row); - if (mdei->valid.column) - prfunc(" Column: %d,", mdei->column); - if (mdei->valid.bit_position) - prfunc(" Bit Position: %d,", mdei->bit_position); - if (mdei->valid.target_id) - prfunc(" ,Target Address: %#lx,", mdei->target_id); - if (mdei->valid.requestor_id) - prfunc(" ,Requestor Address: %#lx,", mdei->requestor_id); - if (mdei->valid.responder_id) - prfunc(" ,Responder Address: %#lx,", mdei->responder_id); - if (mdei->valid.bus_spec_data) - prfunc(" Bus Specific Data: %#lx,", mdei->bus_spec_data); - prfunc("\n"); - - if (mdei->valid.oem_id) { - u8 *p_data = &(mdei->oem_id[0]); - int i; - - prfunc(" OEM Memory Controller ID:"); - for (i = 0; i < 16; i++, p_data++) - prfunc(" %02x", *p_data); - prfunc("\n"); - } - - if (mdei->valid.oem_data) { - platform_mem_dev_err_print((int)mdei->header.len, - (int)sizeof(sal_log_mem_dev_err_info_t) - 1, - &(mdei->oem_data[0]), prfunc); - } -} - -/* - * ia64_log_sel_dev_err_info_print - * - * Format and log the platform SEL device error record section data. - * - * Inputs: sel_dev_err_info * (Ptr to the SEL device error record section - * returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_sel_dev_err_info_print (sal_log_sel_dev_err_info_t *sdei, - prfunc_t prfunc) -{ - int i; - - prfunc("+ SEL Device Error Detail: "); - - if (sdei->valid.record_id) - prfunc(" Record ID: %#x", sdei->record_id); - if (sdei->valid.record_type) - prfunc(" Record Type: %#x", sdei->record_type); - prfunc(" Time Stamp: "); - for (i = 0; i < 4; i++) - prfunc("%1d", sdei->timestamp[i]); - if (sdei->valid.generator_id) - prfunc(" Generator ID: %#x", sdei->generator_id); - if (sdei->valid.evm_rev) - prfunc(" Message Format Version: %#x", sdei->evm_rev); - if (sdei->valid.sensor_type) - prfunc(" Sensor Type: %#x", sdei->sensor_type); - if (sdei->valid.sensor_num) - prfunc(" Sensor Number: %#x", sdei->sensor_num); - if (sdei->valid.event_dir) - prfunc(" Event Direction Type: %#x", sdei->event_dir); - if (sdei->valid.event_data1) - prfunc(" Data1: %#x", sdei->event_data1); - if (sdei->valid.event_data2) - prfunc(" Data2: %#x", sdei->event_data2); - if (sdei->valid.event_data3) - prfunc(" Data3: %#x", sdei->event_data3); - prfunc("\n"); - -} - -/* - * ia64_log_pci_bus_err_info_print - * - * Format and log the platform PCI bus error record section data. - * - * Inputs: pci_bus_err_info * (Ptr to the PCI bus error record section - * returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_pci_bus_err_info_print (sal_log_pci_bus_err_info_t *pbei, - prfunc_t prfunc) -{ - prfunc("+ PCI Bus Error Detail: "); - - if (pbei->valid.err_status) - prfunc(" Error Status: %#lx", pbei->err_status); - if (pbei->valid.err_type) - prfunc(" Error Type: %#x", pbei->err_type); - if (pbei->valid.bus_id) - prfunc(" Bus ID: %#x", pbei->bus_id); - if (pbei->valid.bus_address) - prfunc(" Bus Address: %#lx", pbei->bus_address); - if (pbei->valid.bus_data) - prfunc(" Bus Data: %#lx", pbei->bus_data); - if (pbei->valid.bus_cmd) - prfunc(" Bus Command: %#lx", pbei->bus_cmd); - if (pbei->valid.requestor_id) - prfunc(" Requestor ID: %#lx", pbei->requestor_id); - if (pbei->valid.responder_id) - prfunc(" Responder ID: %#lx", pbei->responder_id); - if (pbei->valid.target_id) - prfunc(" Target ID: %#lx", pbei->target_id); - if (pbei->valid.oem_data) - prfunc("\n"); - - if (pbei->valid.oem_data) { - platform_pci_bus_err_print((int)pbei->header.len, - (int)sizeof(sal_log_pci_bus_err_info_t) - 1, - &(pbei->oem_data[0]), prfunc); - } -} - -/* - * ia64_log_smbios_dev_err_info_print - * - * Format and log the platform SMBIOS device error record section data. - * - * Inputs: smbios_dev_err_info * (Ptr to the SMBIOS device error record - * section returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_smbios_dev_err_info_print (sal_log_smbios_dev_err_info_t *sdei, - prfunc_t prfunc) -{ - u8 i; - - prfunc("+ SMBIOS Device Error Detail: "); + IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); - if (sdei->valid.event_type) - prfunc(" Event Type: %#x", sdei->event_type); - if (sdei->valid.time_stamp) { - prfunc(" Time Stamp: "); - for (i = 0; i < 6; i++) - prfunc("%d", sdei->time_stamp[i]); - } - if ((sdei->valid.data) && (sdei->valid.length)) { - prfunc(" Data: "); - for (i = 0; i < sdei->length; i++) - prfunc(" %02x", sdei->data[i]); - } - prfunc("\n"); -} + /* Clear the Rendez checkin flag for all cpus */ + for(i = 0 ; i < NR_CPUS; i++) + ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; -/* - * ia64_log_pci_comp_err_info_print - * - * Format and log the platform PCI component error record section data. - * - * Inputs: pci_comp_err_info * (Ptr to the PCI component error record section - * returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_pci_comp_err_info_print(sal_log_pci_comp_err_info_t *pcei, - prfunc_t prfunc) -{ - u32 n_mem_regs, n_io_regs; - u64 i, n_pci_data; - u64 *p_reg_data; - u8 *p_oem_data; - - prfunc("+ PCI Component Error Detail: "); - - if (pcei->valid.err_status) - prfunc(" Error Status: %#lx\n", pcei->err_status); - if (pcei->valid.comp_info) - prfunc(" Component Info: Vendor Id = %#x, Device Id = %#x," - " Class Code = %#x, Seg/Bus/Dev/Func = %d/%d/%d/%d\n", - pcei->comp_info.vendor_id, pcei->comp_info.device_id, - pcei->comp_info.class_code, pcei->comp_info.seg_num, - pcei->comp_info.bus_num, pcei->comp_info.dev_num, - pcei->comp_info.func_num); - - n_mem_regs = (pcei->valid.num_mem_regs) ? pcei->num_mem_regs : 0; - n_io_regs = (pcei->valid.num_io_regs) ? pcei->num_io_regs : 0; - p_reg_data = &(pcei->reg_data_pairs[0]); - p_oem_data = (u8 *)p_reg_data + - (n_mem_regs + n_io_regs) * 2 * sizeof(u64); - n_pci_data = p_oem_data - (u8 *)pcei; - - if (n_pci_data > pcei->header.len) { - prfunc(" Invalid PCI Component Error Record format: length = %ld, " - " Size PCI Data = %d, Num Mem-Map/IO-Map Regs = %ld/%ld\n", - pcei->header.len, n_pci_data, n_mem_regs, n_io_regs); - return; - } + /* + * Register the rendezvous spinloop and wakeup mechanism with SAL + */ - if (n_mem_regs) { - prfunc(" Memory Mapped Registers\n Address \tValue\n"); - for (i = 0; i < pcei->num_mem_regs; i++) { - prfunc(" %#lx %#lx\n", p_reg_data[0], p_reg_data[1]); - p_reg_data += 2; - } - } - if (n_io_regs) { - prfunc(" I/O Mapped Registers\n Address \tValue\n"); - for (i = 0; i < pcei->num_io_regs; i++) { - prfunc(" %#lx %#lx\n", p_reg_data[0], p_reg_data[1]); - p_reg_data += 2; + /* Register the rendezvous interrupt vector with SAL */ + while (1) { + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_RENDEZ_VECTOR, + timeout, + SAL_MC_PARAM_RZ_ALWAYS); + rc = isrv.status; + if (rc == 0) + break; + if (rc == -2) { + printk(KERN_INFO "Increasing MCA rendezvous timeout from " + "%ld to %ld milliseconds\n", timeout, isrv.v0); + timeout = isrv.v0; + continue; } + printk(KERN_ERR "Failed to register rendezvous interrupt " + "with SAL (status %ld)\n", rc); + return; } - if (pcei->valid.oem_data) { - platform_pci_comp_err_print((int)pcei->header.len, n_pci_data, - p_oem_data, prfunc); - prfunc("\n"); - } -} - -/* - * ia64_log_plat_specific_err_info_print - * - * Format and log the platform specifie error record section data. - * - * Inputs: sel_dev_err_info * (Ptr to the platform specific error record - * section returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_plat_specific_err_info_print (sal_log_plat_specific_err_info_t *psei, - prfunc_t prfunc) -{ - prfunc("+ Platform Specific Error Detail: "); - - if (psei->valid.err_status) - prfunc(" Error Status: %#lx", psei->err_status); - if (psei->valid.guid) { - prfunc(" GUID: "); - ia64_log_prt_guid(&psei->guid, prfunc); - } - if (psei->valid.oem_data) { - platform_plat_specific_err_print((int) psei->header.len, - (char *) psei->oem_data - (char *) psei, - &psei->oem_data[0], prfunc); - } - prfunc("\n"); -} - -/* - * ia64_log_host_ctlr_err_info_print - * - * Format and log the platform host controller error record section data. - * - * Inputs: host_ctlr_err_info * (Ptr to the host controller error record - * section returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_host_ctlr_err_info_print (sal_log_host_ctlr_err_info_t *hcei, - prfunc_t prfunc) -{ - prfunc("+ Host Controller Error Detail: "); - - if (hcei->valid.err_status) - prfunc(" Error Status: %#lx", hcei->err_status); - if (hcei->valid.requestor_id) - prfunc(" Requestor ID: %#lx", hcei->requestor_id); - if (hcei->valid.responder_id) - prfunc(" Responder ID: %#lx", hcei->responder_id); - if (hcei->valid.target_id) - prfunc(" Target ID: %#lx", hcei->target_id); - if (hcei->valid.bus_spec_data) - prfunc(" Bus Specific Data: %#lx", hcei->bus_spec_data); - if (hcei->valid.oem_data) { - platform_host_ctlr_err_print((int)hcei->header.len, - (int)sizeof(sal_log_host_ctlr_err_info_t) - 1, - &(hcei->oem_data[0]), prfunc); - } - prfunc("\n"); -} -/* - * ia64_log_plat_bus_err_info_print - * - * Format and log the platform bus error record section data. - * - * Inputs: plat_bus_err_info * (Ptr to the platform bus error record section - * returned by SAL) - * prfunc (fn ptr of print function to be used for output) - * Outputs: None - */ -void -ia64_log_plat_bus_err_info_print (sal_log_plat_bus_err_info_t *pbei, - prfunc_t prfunc) -{ - prfunc("+ Platform Bus Error Detail: "); - - if (pbei->valid.err_status) - prfunc(" Error Status: %#lx", pbei->err_status); - if (pbei->valid.requestor_id) - prfunc(" Requestor ID: %#lx", pbei->requestor_id); - if (pbei->valid.responder_id) - prfunc(" Responder ID: %#lx", pbei->responder_id); - if (pbei->valid.target_id) - prfunc(" Target ID: %#lx", pbei->target_id); - if (pbei->valid.bus_spec_data) - prfunc(" Bus Specific Data: %#lx", pbei->bus_spec_data); - if (pbei->valid.oem_data) { - platform_plat_bus_err_print((int)pbei->header.len, - (int)sizeof(sal_log_plat_bus_err_info_t) - 1, - &(pbei->oem_data[0]), prfunc); + /* Register the wakeup interrupt vector with SAL */ + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_WAKEUP_VECTOR, + 0, 0); + rc = isrv.status; + if (rc) { + printk(KERN_ERR "Failed to register wakeup interrupt with SAL " + "(status %ld)\n", rc); + return; } - prfunc("\n"); -} -/* - * ia64_log_proc_dev_err_info_print - * - * Display the processor device error record. - * - * Inputs: sal_log_processor_info_t * (Ptr to processor device error record - * section body). - * prfunc (fn ptr of print function to be used - * for output). - * Outputs: None - */ -void -ia64_log_proc_dev_err_info_print (sal_log_processor_info_t *slpi, - prfunc_t prfunc) -{ -#ifdef MCA_PRT_XTRA_DATA - size_t d_len = slpi->header.len - sizeof(sal_log_section_hdr_t); -#endif - sal_processor_static_info_t *spsi; - int i; - sal_log_mod_error_info_t *p_data; + IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__); - prfunc("+Processor Device Error Info Section\n"); + ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); + /* + * XXX - disable SAL checksum by setting size to 0; should be + * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); + */ + ia64_mc_info.imi_mca_handler_size = 0; -#ifdef MCA_PRT_XTRA_DATA // for test only @FVL + /* Register the os mca handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, + ia64_mc_info.imi_mca_handler, + ia64_tpa(mca_hldlr_ptr->gp), + ia64_mc_info.imi_mca_handler_size, + 0, 0, 0))) { - char *p_data = (char *)&slpi->valid; - - prfunc("SAL_PROC_DEV_ERR SECTION DATA: Data buffer = %p, " - "Data size = %ld\n", (void *)p_data, d_len); - ia64_log_hexdump(p_data, d_len, prfunc); - prfunc("End of SAL_PROC_DEV_ERR SECTION DATA\n"); + printk(KERN_ERR "Failed to register OS MCA handler with SAL " + "(status %ld)\n", rc); + return; } -#endif // MCA_PRT_XTRA_DATA for test only @FVL - - if (slpi->valid.proc_error_map) - prfunc(" Processor Error Map: %#lx\n", slpi->proc_error_map); - - if (slpi->valid.proc_state_param) - prfunc(" Processor State Param: %#lx\n", slpi->proc_state_parameter); - if (slpi->valid.proc_cr_lid) - prfunc(" Processor LID: %#lx\n", slpi->proc_cr_lid); + IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__, + ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); /* - * Note: March 2001 SAL spec states that if the number of elements in any - * of the MOD_ERROR_INFO_STRUCT arrays is zero, the entire array is - * absent. Also, current implementations only allocate space for number of - * elements used. So we walk the data pointer from here on. + * XXX - disable SAL checksum by setting size to 0, should be + * size of the actual init handler in mca_asm.S. */ - p_data = &slpi->info[0]; - - /* Print the cache check information if any*/ - for (i = 0 ; i < slpi->valid.num_cache_check; i++, p_data++) - ia64_log_cache_check_info_print(i, p_data, prfunc); - - /* Print the tlb check information if any*/ - for (i = 0 ; i < slpi->valid.num_tlb_check; i++, p_data++) - ia64_log_tlb_check_info_print(i, p_data, prfunc); - - /* Print the bus check information if any*/ - for (i = 0 ; i < slpi->valid.num_bus_check; i++, p_data++) - ia64_log_bus_check_info_print(i, p_data, prfunc); - - /* Print the reg file check information if any*/ - for (i = 0 ; i < slpi->valid.num_reg_file_check; i++, p_data++) - ia64_log_hexdump((u8 *)p_data, sizeof(sal_log_mod_error_info_t), - prfunc); /* Just hex dump for now */ - - /* Print the ms check information if any*/ - for (i = 0 ; i < slpi->valid.num_ms_check; i++, p_data++) - ia64_log_hexdump((u8 *)p_data, sizeof(sal_log_mod_error_info_t), - prfunc); /* Just hex dump for now */ - - /* Print CPUID registers if any*/ - if (slpi->valid.cpuid_info) { - u64 *p = (u64 *)p_data; + ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp); + ia64_mc_info.imi_monarch_init_handler_size = 0; + ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp); + ia64_mc_info.imi_slave_init_handler_size = 0; - prfunc(" CPUID Regs: %#lx %#lx %#lx %#lx\n", p[0], p[1], p[2], p[3]); - p_data++; - } + IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, + ia64_mc_info.imi_monarch_init_handler); - /* Print processor static info if any */ - if (slpi->valid.psi_static_struct) { - spsi = (sal_processor_static_info_t *)p_data; - - /* Print branch register contents if valid */ - if (spsi->valid.br) - ia64_log_processor_regs_print(spsi->br, 8, "Branch", "br", - prfunc); - - /* Print control register contents if valid */ - if (spsi->valid.cr) - ia64_log_processor_regs_print(spsi->cr, 128, "Control", "cr", - prfunc); - - /* Print application register contents if valid */ - if (spsi->valid.ar) - ia64_log_processor_regs_print(spsi->ar, 128, "Application", - "ar", prfunc); - - /* Print region register contents if valid */ - if (spsi->valid.rr) - ia64_log_processor_regs_print(spsi->rr, 8, "Region", "rr", - prfunc); - - /* Print floating-point register contents if valid */ - if (spsi->valid.fr) - ia64_log_processor_fp_regs_print(spsi->fr, 128, "Floating-point", "fr", - prfunc); + /* Register the os init handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, + ia64_mc_info.imi_monarch_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_monarch_init_handler_size, + ia64_mc_info.imi_slave_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_slave_init_handler_size))) + { + printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " + "(status %ld)\n", rc); + return; } -} -/* - * ia64_log_processor_info_print - * - * Display the processor-specific information logged by PAL as a part - * of MCA or INIT or CMC. - * - * Inputs : lh (Pointer of the sal log header which specifies the - * format of SAL state info as specified by the SAL spec). - * prfunc (fn ptr of print function to be used for output). - * Outputs : None - */ -void -ia64_log_processor_info_print(sal_log_record_header_t *lh, prfunc_t prfunc) -{ - sal_log_section_hdr_t *slsh; - int n_sects; - u32 ercd_pos; + IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); - if (!lh) - return; + /* + * Configure the CMCI/P vector and handler. Interrupts for CMC are + * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). + */ + register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); + register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); + ia64_mca_cmc_vector_setup(); /* Setup vector on BSP & enable */ -#ifdef MCA_PRT_XTRA_DATA // for test only @FVL - ia64_log_prt_record_header(lh, prfunc); -#endif // MCA_PRT_XTRA_DATA for test only @FVL - - if ((ercd_pos = sizeof(sal_log_record_header_t)) >= lh->len) { - IA64_MCA_DEBUG("ia64_mca_log_print: " - "truncated SAL CMC error record. len = %d\n", - lh->len); - return; - } + /* Setup the MCA rendezvous interrupt vector */ + register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); - /* Print record header info */ - ia64_log_rec_header_print(lh, prfunc); + /* Setup the MCA wakeup interrupt vector */ + register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); - for (n_sects = 0; (ercd_pos < lh->len); n_sects++, ercd_pos += slsh->len) { - /* point to next section header */ - slsh = (sal_log_section_hdr_t *)((char *)lh + ercd_pos); - -#ifdef MCA_PRT_XTRA_DATA // for test only @FVL - ia64_log_prt_section_header(slsh, prfunc); -#endif // MCA_PRT_XTRA_DATA for test only @FVL +#ifdef CONFIG_ACPI + /* Setup the CPE interrupt vector */ + { + irq_desc_t *desc; + unsigned int irq; + int cpev = acpi_request_vector(ACPI_INTERRUPT_CPEI); - if (verify_guid(&slsh->guid, &(SAL_PROC_DEV_ERR_SECT_GUID))) { - IA64_MCA_DEBUG("ia64_mca_log_print: unsupported record section\n"); - continue; + if (cpev >= 0) { + for (irq = 0; irq < NR_IRQS; ++irq) + if (irq_to_vector(irq) == cpev) { + desc = irq_descp(irq); + desc->status |= IRQ_PER_CPU; + desc->handler = &irq_type_iosapic_level; + setup_irq(irq, &mca_cpe_irqaction); + } + ia64_mca_register_cpev(cpev); } - - /* - * Now process processor device error record section - */ - ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh, printk); } +#endif - IA64_MCA_DEBUG("ia64_mca_log_print: " - "found %d sections in SAL CMC error record. len = %d\n", - n_sects, lh->len); - if (!n_sects) { - prfunc("No Processor Device Error Info Section found\n"); - return; - } + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA); + ia64_log_init(SAL_INFO_TYPE_INIT); + ia64_log_init(SAL_INFO_TYPE_CMC); + ia64_log_init(SAL_INFO_TYPE_CPE); + + printk(KERN_INFO "MCA related initialization done\n"); } /* - * ia64_log_platform_info_print + * ia64_mca_late_init * - * Format and Log the SAL Platform Error Record. + * Opportunity to setup things that require initialization later + * than ia64_mca_init. Setup a timer to poll for CPEs if the + * platform doesn't support an interrupt driven mechanism. * - * Inputs : lh (Pointer to the sal error record header with format - * specified by the SAL spec). - * prfunc (fn ptr of log output function to use) - * Outputs : platform error status + * Inputs : None + * Outputs : Status */ -int -ia64_log_platform_info_print (sal_log_record_header_t *lh, prfunc_t prfunc) +static int __init +ia64_mca_late_init(void) { - sal_log_section_hdr_t *slsh; - int n_sects; - u32 ercd_pos; - int platform_err = 0; - - if (!lh) - return platform_err; - -#ifdef MCA_PRT_XTRA_DATA // for test only @FVL - ia64_log_prt_record_header(lh, prfunc); -#endif // MCA_PRT_XTRA_DATA for test only @FVL - - if ((ercd_pos = sizeof(sal_log_record_header_t)) >= lh->len) { - IA64_MCA_DEBUG("ia64_mca_log_print: " - "truncated SAL error record. len = %d\n", - lh->len); - return platform_err; - } - - /* Print record header info */ - ia64_log_rec_header_print(lh, prfunc); - - for (n_sects = 0; (ercd_pos < lh->len); n_sects++, ercd_pos += slsh->len) { - /* point to next section header */ - slsh = (sal_log_section_hdr_t *)((char *)lh + ercd_pos); - -#ifdef MCA_PRT_XTRA_DATA // for test only @FVL - ia64_log_prt_section_header(slsh, prfunc); - - if (efi_guidcmp(slsh->guid, SAL_PROC_DEV_ERR_SECT_GUID) != 0) { - size_t d_len = slsh->len - sizeof(sal_log_section_hdr_t); - char *p_data = (char *)&((sal_log_mem_dev_err_info_t *)slsh)->valid; - - prfunc("Start of Platform Err Data Section: Data buffer = %p, " - "Data size = %ld\n", (void *)p_data, d_len); - ia64_log_hexdump(p_data, d_len, prfunc); - prfunc("End of Platform Err Data Section\n"); - } -#endif // MCA_PRT_XTRA_DATA for test only @FVL - - /* - * Now process CPE error record section - */ - if (efi_guidcmp(slsh->guid, SAL_PROC_DEV_ERR_SECT_GUID) == 0) { - ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform Memory Device Error Info Section\n"); - ia64_log_mem_dev_err_info_print((sal_log_mem_dev_err_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform SEL Device Error Info Section\n"); - ia64_log_sel_dev_err_info_print((sal_log_sel_dev_err_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform PCI Bus Error Info Section\n"); - ia64_log_pci_bus_err_info_print((sal_log_pci_bus_err_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform SMBIOS Device Error Info Section\n"); - ia64_log_smbios_dev_err_info_print((sal_log_smbios_dev_err_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform PCI Component Error Info Section\n"); - ia64_log_pci_comp_err_info_print((sal_log_pci_comp_err_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform Specific Error Info Section\n"); - ia64_log_plat_specific_err_info_print((sal_log_plat_specific_err_info_t *) - slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform Host Controller Error Info Section\n"); - ia64_log_host_ctlr_err_info_print((sal_log_host_ctlr_err_info_t *)slsh, - prfunc); - } else if (efi_guidcmp(slsh->guid, SAL_PLAT_BUS_ERR_SECT_GUID) == 0) { - platform_err = 1; - prfunc("+Platform Bus Error Info Section\n"); - ia64_log_plat_bus_err_info_print((sal_log_plat_bus_err_info_t *)slsh, - prfunc); - } else { - IA64_MCA_DEBUG("ia64_mca_log_print: unsupported record section\n"); - continue; - } - } + init_timer(&cmc_poll_timer); + cmc_poll_timer.function = ia64_mca_cmc_poll; - IA64_MCA_DEBUG("ia64_mca_log_print: found %d sections in SAL error record. len = %d\n", - n_sects, lh->len); - if (!n_sects) { - prfunc("No Platform Error Info Sections found\n"); - return platform_err; - } - return platform_err; -} + /* Reset to the correct state */ + cmc_polling_enabled = 0; -/* - * ia64_log_print - * - * Displays the contents of the OS error log information - * - * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) - * prfunc (fn ptr of log output function to use) - * Outputs : platform error status - */ -int -ia64_log_print(int sal_info_type, prfunc_t prfunc) -{ - int platform_err = 0; + init_timer(&cpe_poll_timer); + cpe_poll_timer.function = ia64_mca_cpe_poll; - switch(sal_info_type) { - case SAL_INFO_TYPE_MCA: - prfunc("+CPU %d: SAL log contains MCA error record\n", smp_processor_id()); - ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc); - break; - case SAL_INFO_TYPE_INIT: - prfunc("+CPU %d: SAL log contains INIT error record\n", smp_processor_id()); - ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc); - break; - case SAL_INFO_TYPE_CMC: - prfunc("+BEGIN HARDWARE ERROR STATE AT CMC\n"); - ia64_log_processor_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc); - prfunc("+END HARDWARE ERROR STATE AT CMC\n"); - break; - case SAL_INFO_TYPE_CPE: - prfunc("+BEGIN HARDWARE ERROR STATE AT CPE\n"); - ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc); - prfunc("+END HARDWARE ERROR STATE AT CPE\n"); - break; - default: - prfunc("+MCA UNKNOWN ERROR LOG (UNIMPLEMENTED)\n"); - break; +#ifdef CONFIG_ACPI + /* If platform doesn't support CPEI, get the timer going. */ + if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0 && cpe_poll_enabled) { + register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); + ia64_mca_cpe_poll(0UL); } - return platform_err; -} +#endif -static int __init -ia64_mca_disable_cpe_polling(char *str) -{ - cpe_poll_enabled = 0; - return 1; + return 0; } -__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); +device_initcall(ia64_mca_late_init); --- linux-2.6.3-rc2/arch/ia64/kernel/salinfo.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ia64/kernel/salinfo.c 2004-02-12 00:39:25.000000000 -0800 @@ -16,6 +16,9 @@ * Cache the record across multi-block reads from user space. * Support > 64 cpus. * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. + * + * Jan 28 2004 kaos@sgi.com + * Periodically check for outstanding MCA or INIT records. */ #include @@ -23,6 +26,7 @@ #include #include #include +#include #include #include @@ -179,6 +183,8 @@ shift1_data_saved (struct salinfo_data * /* This routine is invoked in interrupt context. Note: mca.c enables * interrupts before calling this code for CMC/CPE. MCA and INIT events are * not irq safe, do not call any routines that use spinlocks, they may deadlock. + * MCA and INIT records are recorded, a timer event will look for any + * outstanding events and wake up the user space code. * * The buffer passed from mca.c points to the output from ia64_log_get. This is * a persistent buffer but its contents can change between the interrupt and @@ -186,12 +192,12 @@ shift1_data_saved (struct salinfo_data * * changes. */ void -salinfo_log_wakeup(int type, u8 *buffer, u64 size) +salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) { struct salinfo_data *data = salinfo_data + type; struct salinfo_data_saved *data_saved; unsigned long flags = 0; - int i, irqsafe = type != SAL_INFO_TYPE_MCA && type != SAL_INFO_TYPE_INIT; + int i; int saved_size = ARRAY_SIZE(data->data_saved); BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); @@ -224,6 +230,35 @@ salinfo_log_wakeup(int type, u8 *buffer, } } +/* Check for outstanding MCA/INIT records every 5 minutes (arbitrary) */ +#define SALINFO_TIMER_DELAY (5*60*HZ) +static struct timer_list salinfo_timer; + +static void +salinfo_timeout_check(struct salinfo_data *data) +{ + int i; + if (!data->open) + return; + for (i = 0; i < NR_CPUS; ++i) { + if (test_bit(i, &data->cpu_event)) { + /* double up() is not a problem, user space will see no + * records for the additional "events". + */ + up(&data->sem); + } + } +} + +static void +salinfo_timeout (unsigned long arg) +{ + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + add_timer(&salinfo_timer); +} + static int salinfo_event_open(struct inode *inode, struct file *file) { @@ -563,6 +598,11 @@ salinfo_init(void) *sdir++ = salinfo_dir; + init_timer(&salinfo_timer); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + salinfo_timer.function = &salinfo_timeout; + add_timer(&salinfo_timer); + return 0; } --- linux-2.6.3-rc2/arch/ia64/kernel/smpboot.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/kernel/smpboot.c 2004-02-12 00:39:25.000000000 -0800 @@ -77,7 +77,6 @@ extern void __init calibrate_delay (void extern void start_ap (void); extern unsigned long ia64_iobase; -int cpucount; task_t *task_for_booting_cpu; /* Bitmask of currently online CPUs */ --- linux-2.6.3-rc2/arch/ia64/kernel/sys_ia64.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/ia64/kernel/sys_ia64.c 2004-02-12 00:40:54.000000000 -0800 @@ -15,6 +15,7 @@ #include /* doh, must come after sched.h... */ #include #include +#include #include #include @@ -74,7 +75,6 @@ arch_get_unmapped_area (struct file *fil asmlinkage long ia64_getpriority (int which, int who) { - extern long sys_getpriority (int, int); long prio; prio = sys_getpriority(which, who); --- linux-2.6.3-rc2/arch/ia64/kernel/traps.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ia64/kernel/traps.c 2004-02-12 00:39:25.000000000 -0800 @@ -46,21 +46,14 @@ register double f30 asm ("f30"); registe extern spinlock_t timerlist_lock; -static fpswa_interface_t *fpswa_interface; +fpswa_interface_t *fpswa_interface; void __init trap_init (void) { - int major = 0, minor = 0; - - if (ia64_boot_param->fpswa) { + if (ia64_boot_param->fpswa) /* FPSWA fixup: make the interface pointer a kernel virtual address: */ fpswa_interface = __va(ia64_boot_param->fpswa); - major = fpswa_interface->revision >> 16; - minor = fpswa_interface->revision & 0xffff; - } - printk(KERN_INFO "fpswa interface at %lx (rev %d.%d)\n", - ia64_boot_param->fpswa, major, minor); } /* --- linux-2.6.3-rc2/arch/ia64/kernel/unaligned.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/kernel/unaligned.c 2004-02-12 00:39:25.000000000 -0800 @@ -740,6 +740,7 @@ static int emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) { unsigned int len = 1 << ld.x6_sz; + unsigned long val = 0; /* * r0, as target, doesn't need to be checked because Illegal Instruction @@ -750,21 +751,18 @@ emulate_load_int (unsigned long ifa, loa */ /* - * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry. + * ldX.a we will emulate load and also invalidate the ALAT entry. * See comment below for explanation on how we handle ldX.a */ - if (ld.x6_op != 0x2) { - unsigned long val = 0; - if (len != 2 && len != 4 && len != 8) { - DPRINT("unknown size: x6=%d\n", ld.x6_sz); - return -1; - } - /* this assumes little-endian byte-order: */ - if (copy_from_user(&val, (void *) ifa, len)) - return -1; - setreg(ld.r1, val, 0, regs); + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; } + /* this assumes little-endian byte-order: */ + if (copy_from_user(&val, (void *) ifa, len)) + return -1; + setreg(ld.r1, val, 0, regs); /* * check for updates on any kind of loads @@ -817,7 +815,7 @@ emulate_load_int (unsigned long ifa, loa * store & shift to temporary; * r1=temporary * - * So int this case, you would get the right value is r1 but the wrong info in + * So in this case, you would get the right value is r1 but the wrong info in * the ALAT. Notice that you could do it in reverse to finish with address 3 * but you would still get the size wrong. To get the size right, one needs to * execute exactly the same kind of load. You could do it from a aligned @@ -826,9 +824,12 @@ emulate_load_int (unsigned long ifa, loa * So no matter what, it is not possible to emulate an advanced load * correctly. But is that really critical ? * + * We will always convert ld.a into a normal load with ALAT invalidated. This + * will enable compiler to do optimization where certain code path after ld.a + * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. * - * Now one has to look at how ld.a is used, one must either do a ld.c.* or - * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no + * If there is a store after the advanced load, one must either do a ld.c.* or + * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no * entry found in ALAT), and that's perfectly ok because: * * - ld.c.*, if the entry is not present a normal load is executed @@ -836,19 +837,8 @@ emulate_load_int (unsigned long ifa, loa * * In either case, the load can be potentially retried in another form. * - * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT - * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up - * a stale entry later) The register base update MUST also be performed. - * - * Now what is the content of the register and its NaT bit in the case we don't - * do the load ? EAS2.4, says (in case an actual load is needed) - * - * - r1 = [r3], Nat = 0 if succeeds - * - r1 = 0 Nat = 0 if trying to access non-speculative memory - * - * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to - * retry and thus eventually reload the register thereby changing Nat and - * register content. + * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick + * up a stale entry later). The register base update MUST also be performed. */ /* --- linux-2.6.3-rc2/arch/ia64/lib/dec_and_lock.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ia64/lib/dec_and_lock.c 2004-02-12 00:41:22.000000000 -0800 @@ -13,6 +13,7 @@ #include #include +#ifndef CONFIG_LOCKMETER /* * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these * operations have to be done atomically, so that the count doesn't drop to zero without @@ -40,3 +41,4 @@ atomic_dec_and_lock (atomic_t *refcount, } EXPORT_SYMBOL(atomic_dec_and_lock); +#endif --- linux-2.6.3-rc2/arch/ia64/lib/io.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ia64/lib/io.c 2004-02-12 00:39:25.000000000 -0800 @@ -9,13 +9,13 @@ * This needs to be optimized. */ void -__ia64_memcpy_fromio (void * to, unsigned long from, long count) +__ia64_memcpy_fromio (void *to, unsigned long from, long count) { + char *dst = to; + while (count) { count--; - *(char *) to = readb(from); - ((char *) to)++; - from++; + *dst++ = readb(from++); } } EXPORT_SYMBOL(__ia64_memcpy_fromio); @@ -25,13 +25,13 @@ EXPORT_SYMBOL(__ia64_memcpy_fromio); * This needs to be optimized. */ void -__ia64_memcpy_toio (unsigned long to, void * from, long count) +__ia64_memcpy_toio (unsigned long to, void *from, long count) { + char *src = from; + while (count) { count--; - writeb(*(char *) from, to); - ((char *) from)++; - to++; + writeb(*src++, to++); } } EXPORT_SYMBOL(__ia64_memcpy_toio); --- linux-2.6.3-rc2/arch/ia64/sn/io/sn2/ml_SN_init.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/ml_SN_init.c 2004-02-12 00:41:13.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include int maxcpus; @@ -69,12 +70,15 @@ void init_platform_nodepda(nodepda_t *np } void -init_platform_hubinfo(nodepda_t **nodepdaindr) { +init_platform_hubinfo(nodepda_t **nodepdaindr) +{ cnodeid_t cnode; hubinfo_t hubinfo; nodepda_t *npda; extern int numionodes; + if (IS_RUNNING_ON_SIMULATOR()) + return; for (cnode = 0; cnode < numionodes; cnode++) { npda = nodepdaindr[cnode]; hubinfo = (hubinfo_t)npda->pdinfo; --- linux-2.6.3-rc2/arch/ia64/sn/io/sn2/ml_SN_intr.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/sn/io/sn2/ml_SN_intr.c 2004-02-12 00:41:13.000000000 -0800 @@ -30,6 +30,7 @@ #include #include #include +#include extern irqpda_t *irqpdaindr; extern cnodeid_t master_node_get(vertex_hdl_t vhdl); @@ -216,7 +217,6 @@ static cpuid_t intr_cpu_choose_from_node { cpuid_t cpu, best_cpu = CPU_NONE; int slice, min_count = 1000; - irqpda_t *irqs; for (slice = CPUS_PER_NODE - 1; slice >= 0; slice--) { int intrs; @@ -227,8 +227,7 @@ static cpuid_t intr_cpu_choose_from_node if (!cpu_online(cpu)) continue; - irqs = irqpdaindr; - intrs = irqs->num_irq_used; + intrs = pdacpu(cpu)->sn_num_irqs; if (min_count > intrs) { min_count = intrs; @@ -243,6 +242,7 @@ static cpuid_t intr_cpu_choose_from_node } } } + pdacpu(best_cpu)->sn_num_irqs++; return best_cpu; } --- linux-2.6.3-rc2/arch/ia64/sn/kernel/irq.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ia64/sn/kernel/irq.c 2004-02-12 00:40:58.000000000 -0800 @@ -121,7 +121,7 @@ sn_end_irq(unsigned int irq) static void sn_set_affinity_irq(unsigned int irq, unsigned long cpu) { -#if CONFIG_SMP +#ifdef CONFIG_SMP int redir = 0; struct sn_intr_list_t *p = sn_intr_list[irq]; pcibr_intr_t intr; --- linux-2.6.3-rc2/arch/ia64/sn/kernel/mca.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ia64/sn/kernel/mca.c 2004-02-12 00:39:25.000000000 -0800 @@ -68,20 +68,6 @@ print_hook(const char *fmt, ...) } - -/* - * ia64_sn2_platform_plat_specific_err_print - * - * Called by the MCA handler to log platform-specific errors. - */ -void -ia64_sn2_platform_plat_specific_err_print(int header_len, int sect_len, u8 *p_data, prfunc_t prfunc) -{ - ia64_sn_plat_specific_err_print(print_hook, p_data - sect_len); -} - - - static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) { --- linux-2.6.3-rc2/arch/ia64/sn/kernel/sn2/sn2_smp.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ia64/sn/kernel/sn2/sn2_smp.c 2004-02-12 00:39:25.000000000 -0800 @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. */ #include @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -67,14 +69,56 @@ wait_piowc(void) * * Purges the translation caches of all processors of the given virtual address * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu, + * then only the local TLB needs to be flushed. This flushing can be done + * using ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. */ void sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) { - int cnode, mycnode, nasid, flushed=0; + int i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0; volatile unsigned long *ptc0, *ptc1; unsigned long flags=0, data0, data1; + struct mm_struct *mm=current->active_mm; + short nasids[NR_NODES], nix; + DECLARE_BITMAP(nodes_flushed, NR_NODES); + + CLEAR_BITMAP(nodes_flushed, NR_NODES); + + i = 0; + + for_each_cpu_mask(cpu, mm->cpu_vm_mask) { + cnode = cpu_to_node(cpu); + __set_bit(cnode, nodes_flushed); + lcpu = cpu; + i++; + } + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id())) { + do { + ia64_ptcl(start, nbits<<2); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + preempt_enable(); + return; + } + + nix = 0; + for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; + cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode)) + nasids[nix++] = cnodeid_to_nasid(cnode); + data0 = (1UL< - in the kernel source. - - The watchdog is usually used together with the watchdog daemon - which is available from - . This daemon can - also monitor NFS connections and can reboot the machine when the process - table is full. - - If unsure, say N. - -config WATCHDOG_NOWAYOUT - bool "Disable watchdog shutdown on close" - depends on WATCHDOG - help - The default watchdog behaviour (which you get if you say N here) is - to stop the timer if the process managing it closes the file - /dev/watchdog. It's always remotely possible that this process might - get killed. If you say Y here, the watchdog cannot be stopped once - it has been started. - -config SOFT_WATCHDOG - bool "Software watchdog" - depends on WATCHDOG - help - A software monitoring watchdog. This will fail to reboot your system - from some situations that the hardware watchdog will recover - from. Equally it's a lot cheaper to install. - - To compile this driver as a module, choose M here: the - module will be called softdog. +source "drivers/char/watchdog/Kconfig" config GEN_RTC tristate "Generic /dev/rtc emulation" if !SUN3 @@ -1073,8 +1030,7 @@ config GEN_RTC precision in some cases. To compile this driver as a module, choose M here: the - module will be called genrtc. To load the module automatically - add 'alias char-major-10-135 genrtc' to your /etc/modules.conf + module will be called genrtc. config GEN_RTC_X bool "Extended RTC operation" --- linux-2.6.3-rc2/arch/m68k/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/m68k/kernel/signal.c 2004-02-12 00:40:57.000000000 -0800 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3-rc2/arch/m68k/kernel/sys_m68k.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/m68k/kernel/sys_m68k.c 2004-02-12 00:40:55.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -156,8 +157,6 @@ out: } #endif -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; --- linux-2.6.3-rc2/arch/m68knommu/kernel/signal.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/m68knommu/kernel/signal.c 2004-02-12 00:40:57.000000000 -0800 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -50,8 +51,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -asmlinkage long sys_wait4(pid_t pid, unsigned int * stat_addr, int options, - struct rusage * ru); asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs); /* --- linux-2.6.3-rc2/arch/m68knommu/kernel/sys_m68k.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/m68knommu/kernel/sys_m68k.c 2004-02-12 00:40:55.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -111,8 +112,6 @@ out: return error; } -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; --- linux-2.6.3-rc2/arch/mips/kernel/ioctl32.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/mips/kernel/ioctl32.c 2004-02-12 00:40:52.000000000 -0800 @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -94,8 +95,6 @@ #include #endif -long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg) { mm_segment_t old_fs = get_fs(); --- linux-2.6.3-rc2/arch/mips/kernel/irixioctl.c 2003-07-02 14:53:13.000000000 -0700 +++ 25/arch/mips/kernel/irixioctl.c 2004-02-12 00:40:52.000000000 -0800 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -26,9 +27,6 @@ struct irix_termios { cc_t c_cc[NCCS]; }; -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); -extern asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count); extern void start_tty(struct tty_struct *tty); static struct tty_struct *get_tty(int fd) { --- linux-2.6.3-rc2/arch/mips/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/mips/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -835,7 +835,7 @@ static cpumask_t irq_affinity [NR_IRQS] static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -873,7 +873,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/mips/kernel/linux32.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/mips/kernel/linux32.c 2004-02-12 00:40:55.000000000 -0800 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -123,8 +124,6 @@ out: } -asmlinkage long sys_truncate(const char * path, unsigned long length); - asmlinkage int sys_truncate64(const char *path, unsigned int high, unsigned int low) { @@ -133,8 +132,6 @@ asmlinkage int sys_truncate64(const char return sys_truncate(path, ((long) high << 32) | low); } -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys_ftruncate64(unsigned int fd, unsigned int high, unsigned int low) { @@ -375,8 +372,6 @@ xlate_dirent(void *dirent64, void *diren return; } -asmlinkage long sys_getdents(unsigned int fd, void * dirent, unsigned int count); - asmlinkage long sys32_getdents(unsigned int fd, void * dirent32, unsigned int count) { @@ -497,8 +492,6 @@ struct sysinfo32 { char _f[8]; }; -extern asmlinkage int sys_sysinfo(struct sysinfo *info); - asmlinkage int sys32_sysinfo(struct sysinfo32 *info) { struct sysinfo s; @@ -633,10 +626,6 @@ sys32_settimeofday(struct compat_timeval return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -extern asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, - unsigned long offset_low, loff_t * result, - unsigned int origin); - asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high, unsigned int offset_low, loff_t * result, unsigned int origin) @@ -1018,10 +1007,6 @@ out_nofds: } - -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, - struct timespec *interval); - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) { @@ -1668,9 +1653,6 @@ asmlinkage long sys32_sysctl(struct sysc #endif /* CONFIG_SYSCTL */ -extern asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr); - asmlinkage int sys32_sched_setaffinity(compat_pid_t pid, unsigned int len, u32 *user_mask_ptr) { @@ -1692,9 +1674,6 @@ asmlinkage int sys32_sched_setaffinity(c return ret; } -extern asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr); - asmlinkage int sys32_sched_getaffinity(compat_pid_t pid, unsigned int len, u32 *user_mask_ptr) { @@ -1734,8 +1713,6 @@ asmlinkage long sys32_newuname(struct ne return ret; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int sys32_personality(unsigned long personality) { int ret; @@ -1820,8 +1797,6 @@ asmlinkage int sys32_adjtimex(struct tim return ret; } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { @@ -1842,8 +1817,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage ssize_t sys32_readahead(int fd, u32 pad0, u64 a2, u64 a3, size_t count) { --- linux-2.6.3-rc2/arch/mips/kernel/signal32.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/mips/kernel/signal32.c 2004-02-12 00:40:54.000000000 -0800 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -761,9 +762,6 @@ out: return ret; } -asmlinkage long sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, - size_t sigsetsize); - asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize) { @@ -785,8 +783,6 @@ asmlinkage int sys32_rt_sigprocmask(int return ret; } -asmlinkage long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - asmlinkage int sys32_rt_sigpending(compat_sigset_t *uset, unsigned int sigsetsize) { @@ -895,8 +891,6 @@ asmlinkage int sys32_rt_sigtimedwait(com return ret; } -extern asmlinkage int sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage int sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { siginfo_t info; --- linux-2.6.3-rc2/arch/mips/kernel/sysirix.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/mips/kernel/sysirix.c 2004-02-12 00:40:55.000000000 -0800 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -235,13 +236,6 @@ asmlinkage int irix_prctl(struct pt_regs #undef DEBUG_PROCGRPS extern unsigned long irix_mapelf(int fd, struct elf_phdr *user_phdrp, int cnt); -extern asmlinkage int sys_setpgid(pid_t pid, pid_t pgid); -extern void sys_sync(void); -extern asmlinkage int sys_getsid(pid_t pid); -extern asmlinkage long sys_write (unsigned int fd, const char *buf, unsigned long count); -extern asmlinkage long sys_lseek (unsigned int fd, off_t offset, unsigned int origin); -extern asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist); -extern asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist); extern int getrusage(struct task_struct *p, int who, struct rusage *ru); extern char *prom_getenv(char *name); extern long prom_setenv(char *name, char *value); @@ -368,7 +362,7 @@ asmlinkage int irix_syssgi(struct pt_reg retval = HZ; goto out; case 4: - retval = NGROUPS; + retval = NGROUPS_MAX; goto out; case 5: retval = NR_OPEN; @@ -694,9 +688,6 @@ asmlinkage int irix_pause(void) return -EINTR; } -extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void * data); - /* XXX need more than this... */ asmlinkage int irix_mount(char *dev_name, char *dir_name, unsigned long flags, char *type, void *data, int datalen) @@ -792,9 +783,6 @@ out: return error; } -extern asmlinkage int sys_setpgid(pid_t pid, pid_t pgid); -extern asmlinkage int sys_setsid(void); - asmlinkage int irix_setpgrp(int flags) { int error; @@ -883,8 +871,6 @@ asmlinkage unsigned long irix_sethostid( return -EINVAL; } -extern asmlinkage int sys_socket(int family, int type, int protocol); - asmlinkage int irix_socket(int family, int type, int protocol) { switch(type) { @@ -1356,8 +1342,6 @@ asmlinkage int irix_fxstat(int version, return error; } -extern asmlinkage int sys_mknod(const char * filename, int mode, unsigned dev); - asmlinkage int irix_xmknod(int ver, char *filename, int mode, unsigned dev) { int retval; @@ -1501,9 +1485,6 @@ asmlinkage int irix_sigqueue(int pid, in return -EINVAL; } -extern asmlinkage int sys_truncate(const char * path, unsigned long length); -extern asmlinkage int sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int irix_truncate64(char *name, int pad, int size1, int size2) { int retval; @@ -1532,10 +1513,6 @@ out: return retval; } -extern asmlinkage unsigned long -sys_mmap(unsigned long addr, size_t len, int prot, int flags, int fd, - off_t offset); - asmlinkage int irix_mmap64(struct pt_regs *regs) { int len, prot, flags, fd, off1, off2, error, base = 0; @@ -2106,9 +2083,6 @@ out: #undef DEBUG_FCNTL -extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg); - #define IRIX_F_ALLOCSP 10 asmlinkage int irix_fcntl(int fd, int cmd, int arg) --- linux-2.6.3-rc2/arch/parisc/hpux/ioctl.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/parisc/hpux/ioctl.c 2004-02-12 00:40:52.000000000 -0800 @@ -35,13 +35,12 @@ #include #include +#include #include #include #include #include -int sys_ioctl(unsigned int, unsigned int, unsigned long); - static int hpux_ioctl_t(int fd, unsigned long cmd, unsigned long arg) { int result = -EOPNOTSUPP; --- linux-2.6.3-rc2/arch/parisc/hpux/sys_hpux.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/parisc/hpux/sys_hpux.c 2004-02-12 00:40:57.000000000 -0800 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,6 @@ #include #include -unsigned long sys_brk(unsigned long addr); - unsigned long hpux_brk(unsigned long addr) { /* Sigh. Looks like HP/UX libc relies on kernel bugs. */ @@ -61,7 +60,6 @@ int hpux_ptrace(void) int hpux_wait(int *stat_loc) { - extern int sys_waitpid(int, int *, int); return sys_waitpid(-1, stat_loc, 0); } @@ -269,9 +267,6 @@ static int hpux_uname(struct hpux_utsnam return error; } -int sys_sethostname(char *, int); -int sys_gethostname(char *, int); - /* Note: HP-UX just uses the old suser() function to check perms * in this system call. We'll use capable(CAP_SYS_ADMIN). */ --- linux-2.6.3-rc2/arch/parisc/kernel/parisc_ksyms.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/parisc/kernel/parisc_ksyms.c 2004-02-12 00:40:55.000000000 -0800 @@ -27,6 +27,7 @@ #include #include #include +#include #include EXPORT_SYMBOL(memchr); @@ -83,10 +84,6 @@ EXPORT_SYMBOL(__memcpy_fromio); EXPORT_SYMBOL(__memset_io); #include -extern long sys_open(const char *, int, int); -extern off_t sys_lseek(int, off_t, int); -extern int sys_read(int, char *, int); -extern int sys_write(int, const char *, int); EXPORT_SYMBOL(sys_open); EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(sys_read); --- linux-2.6.3-rc2/arch/parisc/kernel/signal32.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/parisc/kernel/signal32.c 2004-02-12 00:40:54.000000000 -0800 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -92,9 +93,6 @@ get_sigset32(compat_sigset_t *up, sigset int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize) { - extern long sys_rt_sigprocmask(int how, - sigset_t *set, sigset_t *oset, - size_t sigsetsize); sigset_t old_set, new_set; int ret; @@ -115,7 +113,6 @@ int sys32_rt_sigpending(compat_sigset_t { int ret; sigset_t set; - extern long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); KERNEL_SYSCALL(ret, sys_rt_sigpending, &set, sigsetsize); --- linux-2.6.3-rc2/arch/parisc/kernel/sys_parisc32.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/parisc/kernel/sys_parisc32.c 2004-02-12 00:40:54.000000000 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -357,7 +358,6 @@ asmlinkage long sys32_sched_rr_get_inter { struct timespec t; int ret; - extern asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); KERNEL_SYSCALL(ret, sys_sched_rr_get_interval, pid, &t); if (put_compat_timespec(&t, interval)) @@ -1089,7 +1089,6 @@ asmlinkage long sys32_msgrcv(int msqid, } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -1197,7 +1196,6 @@ asmlinkage int sys32_nfsservctl(int cmd, return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t *offset, size_t count); typedef long __kernel_loff_t32; /* move this to asm/posix_types.h? */ asmlinkage int sys32_sendfile64(int out_fd, int in_fd, __kernel_loff_t32 *offset, s32 count) @@ -1345,8 +1343,6 @@ asmlinkage int sys32_sysinfo(struct sysi * half of the argument has been zeroed by syscall.S. */ -extern asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin); - asmlinkage int sys32_lseek(unsigned int fd, int offset, unsigned int origin) { return sys_lseek(fd, offset, origin); @@ -1367,8 +1363,6 @@ asmlinkage long sys32_semctl(int semid, return sys_semctl (semid, semnum, cmd, arg); } -extern long sys_lookup_dcookie(u64 cookie64, char *buf, size_t len); - long sys32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char *buf, size_t len) { --- linux-2.6.3-rc2/arch/parisc/kernel/sys_parisc.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/parisc/kernel/sys_parisc.c 2004-02-12 00:40:52.000000000 -0800 @@ -30,6 +30,7 @@ #include #include #include +#include int sys_pipe(int *fildes) { @@ -182,10 +183,6 @@ long sys_shmat_wrapper(int shmid, char * /* Fucking broken ABI */ #ifdef CONFIG_PARISC64 -extern asmlinkage long sys_truncate(const char *, unsigned long); -extern asmlinkage long sys_ftruncate(unsigned int, unsigned long); -extern asmlinkage long sys_fcntl(unsigned int, unsigned int, unsigned long); - asmlinkage long parisc_truncate64(const char * path, unsigned int high, unsigned int low) { @@ -214,9 +211,6 @@ asmlinkage long sys_fcntl64(unsigned int } #else -extern asmlinkage long sys_truncate64(const char *, loff_t); -extern asmlinkage long sys_ftruncate64(unsigned int, loff_t); - asmlinkage long parisc_truncate64(const char * path, unsigned int high, unsigned int low) { @@ -230,12 +224,6 @@ asmlinkage long parisc_ftruncate64(unsig } #endif -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char *buf, - size_t count, loff_t pos); -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char *buf, - size_t count, loff_t pos); -extern asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage ssize_t parisc_pread64(unsigned int fd, char *buf, size_t count, unsigned int high, unsigned int low) { --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/configs/g5_defconfig 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,1078 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_EARLY_PRINTK=y +CONFIG_COMPAT=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_STANDALONE=y + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_IKCONFIG is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set +# CONFIG_KMOD is not set + +# +# Platform support +# +# CONFIG_PPC_ISERIES is not set +CONFIG_PPC_PSERIES=y +CONFIG_PPC=y +CONFIG_PPC64=y +CONFIG_PPC_OF=y +CONFIG_ALTIVEC=y +CONFIG_PPC_PMAC=y +CONFIG_PPC_PMAC64=y +CONFIG_BOOTX_TEXT=y +CONFIG_POWER4_ONLY=y +CONFIG_SMP=y +CONFIG_IRQ_ALL_CPUS=y +CONFIG_NR_CPUS=2 +# CONFIG_HMT is not set +CONFIG_DISCONTIGMEM=y +# CONFIG_NUMA is not set +# CONFIG_PPC_RTAS is not set +# CONFIG_LPARCFG is not set + +# +# General setup +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y +CONFIG_HOTPLUG=y + +# +# PCMCIA/CardBus support +# +# CONFIG_PCMCIA is not set + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_FW_LOADER=y + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_INITRD=y + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +# CONFIG_IDEDISK_STROKE is not set +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_IDETAPE=y +CONFIG_BLK_DEV_IDEFLOPPY=y +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set +# CONFIG_IDE_TASKFILE_IO is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_SL82C105 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_ADMA=y +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +# CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +CONFIG_BLK_DEV_IDE_PMAC=y +CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y +CONFIG_BLK_DEV_IDEDMA_PMAC=y +# CONFIG_BLK_DEV_IDE_PMAC_BLINK is not set +CONFIG_BLK_DEV_IDEDMA_PMAC_AUTO=y +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_DMA_NONPCI is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_REPORT_LUNS=y +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_MEGARAID is not set +CONFIG_SCSI_SATA=y +CONFIG_SCSI_SATA_SVW=y +# CONFIG_SCSI_ATA_PIIX is not set +# CONFIG_SCSI_SATA_PROMISE is not set +# CONFIG_SCSI_SATA_VIA is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +CONFIG_SCSI_QLA2XXX=y +# CONFIG_SCSI_QLA21XX is not set +# CONFIG_SCSI_QLA22XX is not set +# CONFIG_SCSI_QLA2300 is not set +# CONFIG_SCSI_QLA2322 is not set +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_QLA6322 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_MESH is not set +# CONFIG_SCSI_MAC53C94 is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +CONFIG_MD_RAID5=y +# CONFIG_MD_RAID6 is not set +# CONFIG_MD_MULTIPATH is not set +CONFIG_BLK_DEV_DM=y +CONFIG_DM_IOCTL_V4=y + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +CONFIG_IEEE1394=m + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +CONFIG_IEEE1394_OUI_DB=y + +# +# Device Drivers +# +# CONFIG_IEEE1394_PCILYNX is not set +CONFIG_IEEE1394_OHCI1394=m + +# +# Protocol Drivers +# +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=m +# CONFIG_IEEE1394_CMP is not set + +# +# I2O device support +# + +# +# Macintosh device drivers +# +CONFIG_ADB_PMU=y +# CONFIG_PMAC_PBOOK is not set +# CONFIG_PMAC_BACKLIGHT is not set +# CONFIG_MAC_SERIAL is not set +CONFIG_ADB=y +# CONFIG_INPUT_ADBHID is not set +CONFIG_THERM_PM72=y + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +# CONFIG_NETLINK_DEV is not set +CONFIG_UNIX=y +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=y +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +CONFIG_INET_ECN=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +# CONFIG_IPV6 is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_NETFILTER is not set +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +CONFIG_IPV6_SCTP__=y +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set +CONFIG_LLC=y +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +CONFIG_BONDING=m +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_MACE is not set +# CONFIG_BMAC is not set +# CONFIG_OAKNET is not set +# CONFIG_HAPPYMEAL is not set +CONFIG_SUNGEM=y +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_NET_PCI is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=y +CONFIG_ACENIC_OMIT_TIGON_I=y +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SK98LIN is not set +CONFIG_TIGON3=m + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPPOE=m +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +CONFIG_TR=y +CONFIG_IBMOL=y +# CONFIG_IBMLS is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# Bluetooth support +# +# CONFIG_BT is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN_BOOL is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +CONFIG_SERIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +# CONFIG_MOUSE_PS2 is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +CONFIG_HVC_CONSOLE=y + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +# CONFIG_QIC02_TAPE is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 + +# +# I2C support +# +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=y +# CONFIG_I2C_ALGOPCF is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_ELV is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISA is not set +CONFIG_I2C_KEYWEST=y +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VELLEMAN is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set + +# +# I2C Hardware Sensors Chip support +# +# CONFIG_I2C_SENSOR is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +CONFIG_FB=y +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_OF=y +# CONFIG_FB_CONTROL is not set +# CONFIG_FB_PLATINUM is not set +# CONFIG_FB_VALKYRIE is not set +# CONFIG_FB_CT65550 is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S3TRIO is not set +# CONFIG_FB_VGA16 is not set +CONFIG_FB_RIVA=y +# CONFIG_FB_MATROX is not set +CONFIG_FB_RADEON=y +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_PCI_CONSOLE=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_UHCI_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_BLUETOOTH_TTY is not set +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +CONFIG_USB_STORAGE_DEBUG=y +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_HP8200e=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y + +# +# USB Human Interface Devices (HID) +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_USB_HIDDEV=y +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_XPAD is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +# CONFIG_USB_HPUSBSCSI is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network adaptors +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +CONFIG_USB_USBNET=m + +# +# USB Host-to-Host Cables +# +CONFIG_USB_AN2720=y +CONFIG_USB_BELKIN=y +CONFIG_USB_GENESYS=y +CONFIG_USB_NET1080=y +CONFIG_USB_PL2301=y + +# +# Intelligent USB Devices/Gadgets +# +CONFIG_USB_ARMLINUX=y +CONFIG_USB_EPSON2888=y +CONFIG_USB_ZAURUS=y +CONFIG_USB_CDCETHER=y + +# +# USB Network Adapters +# +CONFIG_USB_AX8817X=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA28=y +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +CONFIG_USB_SERIAL_KEYSPAN_USA19=y +CONFIG_USB_SERIAL_KEYSPAN_USA18X=y +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_SAFE=m +CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OMNINET=m +CONFIG_USB_EZUSB=y + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_TIGL is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_BRLVGER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_TEST is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=m +# CONFIG_AUTOFS4_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +# CONFIG_DEVFS_FS is not set +CONFIG_DEVPTS_FS=y +CONFIG_DEVPTS_FS_XATTR=y +# CONFIG_DEVPTS_FS_SECURITY is not set +CONFIG_TMPFS=y +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_SMB_FS is not set +CONFIG_CIFS=m +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_NEC98_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_EFI_PARTITION is not set + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=y + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SLAB is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_XMON is not set +# CONFIG_PPCDBG is not set +# CONFIG_DEBUG_INFO is not set + +# +# Security options +# +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=m +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_TEST=m + +# +# Library routines +# +CONFIG_CRC32=y +CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_DEFLATE=m --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/configs/pSeries_defconfig 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,783 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_64BIT=y +CONFIG_MMU=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_EARLY_PRINTK=y +CONFIG_COMPAT=y +CONFIG_FRAME_POINTER=y +CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_STANDALONE=y + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set +# CONFIG_KMOD is not set + +# +# Platform support +# +# CONFIG_PPC_ISERIES is not set +CONFIG_PPC_PSERIES=y +CONFIG_PPC=y +CONFIG_PPC64=y +CONFIG_PPC_OF=y +CONFIG_ALTIVEC=y +# CONFIG_PPC_PMAC is not set +# CONFIG_BOOTX_TEXT is not set +# CONFIG_POWER4_ONLY is not set +CONFIG_SMP=y +CONFIG_IRQ_ALL_CPUS=y +CONFIG_NR_CPUS=32 +# CONFIG_HMT is not set +# CONFIG_DISCONTIGMEM is not set +CONFIG_PPC_RTAS=y +CONFIG_RTAS_FLASH=m +CONFIG_SCANLOG=m +CONFIG_LPARCFG=y + +# +# General setup +# +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_PCI_LEGACY_PROC=y +CONFIG_PCI_NAMES=y +# CONFIG_HOTPLUG is not set +CONFIG_PROC_DEVICETREE=y +# CONFIG_CMDLINE_BOOL is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_REPORT_LUNS=y +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_MEGARAID is not set +# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INIA100 is not set +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +CONFIG_SCSI_QLA2XXX=y +CONFIG_SCSI_QLA21XX=y +CONFIG_SCSI_QLA22XX=y +# CONFIG_SCSI_QLA2300 is not set +# CONFIG_SCSI_QLA2322 is not set +# CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_QLA6322 is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +CONFIG_MD_RAID5=y +# CONFIG_MD_RAID6 is not set +# CONFIG_MD_MULTIPATH is not set +CONFIG_BLK_DEV_DM=y +CONFIG_DM_IOCTL_V4=y + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# + +# +# Macintosh device drivers +# + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +# CONFIG_NETLINK_DEV is not set +CONFIG_UNIX=y +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=y +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +CONFIG_INET_ECN=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +# CONFIG_IPV6 is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_NETFILTER is not set +CONFIG_XFRM=y +CONFIG_XFRM_USER=m + +# +# SCTP Configuration (EXPERIMENTAL) +# +CONFIG_IPV6_SCTP__=y +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +CONFIG_BONDING=m +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_OAKNET is not set +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=y +# CONFIG_TYPHOON is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=y +CONFIG_ACENIC_OMIT_TIGON_I=y +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SK98LIN is not set +# CONFIG_TIGON3 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_IXGB is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPPOE=m +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# Bluetooth support +# +# CONFIG_BT is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN_BOOL is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +# CONFIG_GAMEPORT is not set +CONFIG_SOUND_GAMEPORT=y +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +CONFIG_INPUT_MISC=y +CONFIG_INPUT_PCSPKR=y +# CONFIG_INPUT_UINPUT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_PMACZILOG is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +CONFIG_HVC_CONSOLE=y + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +# CONFIG_QIC02_TAPE is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +CONFIG_FB=y +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_OF=y +# CONFIG_FB_CT65550 is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S3TRIO is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_RIVA is not set +CONFIG_FB_MATROX=y +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G450=y +CONFIG_FB_MATROX_G100=y +CONFIG_FB_MATROX_MULTIHEAD=y +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_PCI_CONSOLE=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_JFS_FS=y +CONFIG_JFS_POSIX_ACL=y +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=m +# CONFIG_XFS_RT is not set +# CONFIG_XFS_QUOTA is not set +# CONFIG_XFS_SECURITY is not set +CONFIG_XFS_POSIX_ACL=y +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=m +# CONFIG_AUTOFS4_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_UDF_FS=m + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +# CONFIG_DEVFS_FS is not set +CONFIG_DEVPTS_FS=y +CONFIG_DEVPTS_FS_XATTR=y +# CONFIG_DEVPTS_FS_SECURITY is not set +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=y +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_SMB_FS is not set +CONFIG_CIFS=m +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=y + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SLAB is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_XMON=y +CONFIG_XMON_DEFAULT=y +# CONFIG_PPCDBG is not set +# CONFIG_DEBUG_INFO is not set + +# +# Security options +# +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=m +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_TEST=m + +# +# Library routines +# +CONFIG_CRC32=y +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m --- linux-2.6.3-rc2/arch/ppc64/Kconfig 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/Kconfig 2004-02-12 00:39:53.000000000 -0800 @@ -60,7 +60,7 @@ config PPC_ISERIES bool "iSeries" config PPC_PSERIES - bool "pSeries" + bool "pSeries / PowerMac G5" endchoice @@ -72,6 +72,11 @@ config PPC64 bool default y +config PPC_OF + depends on PPC_PSERIES + bool + default y + # VMX is pSeries only for now until somebody writes the iSeries # exception vectors for it config ALTIVEC @@ -79,6 +84,22 @@ config ALTIVEC depends on PPC_PSERIES default y +config PPC_PMAC + depends on PPC_PSERIES + bool "Apple PowerMac G5 support" + +config PPC_PMAC64 + bool + depends on PPC_PMAC + default y + +config BOOTX_TEXT + bool "Support for early boot text console" + depends PPC_OF + help + Say Y here to see progress messages from the boot firmware in text + mode. Requires an Open Firmware compatible video card. + config POWER4_ONLY bool "Optimize for POWER4" default n @@ -357,6 +378,13 @@ config DEBUG_INFO debugging info resulting in a larger kernel image. Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. + +config DEBUG_SPINLOCK_SLEEP + bool "Sleep-inside-spinlock checking" + depends on DEBUG_KERNEL + help + If you say Y here, various routines which may sleep will become very + noisy if they are called with a spinlock held. endmenu --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/btext.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,748 @@ +/* + * Procedures for drawing on the screen early on in the boot process. + * + * Benjamin Herrenschmidt + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef NO_SCROLL + +#ifndef NO_SCROLL +static void scrollscreen(void); +#endif + +static void draw_byte(unsigned char c, long locX, long locY); +static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); + +static int g_loc_X; +static int g_loc_Y; +static int g_max_loc_X; +static int g_max_loc_Y; + +unsigned long disp_BAT[2] __initdata = {0, 0}; + +#define cmapsz (16*256) + +static unsigned char vga_font[cmapsz]; + +int boot_text_mapped; +int force_printk_to_btext = 0; + +boot_infos_t disp_bi; + +extern char *klimit; + +/* This function will enable the early boot text when doing OF booting. This + * way, xmon output should work too + */ +void __init btext_setup_display(int width, int height, int depth, int pitch, + unsigned long address) +{ + unsigned long offset = reloc_offset(); + boot_infos_t* bi = PTRRELOC(&disp_bi); + + RELOC(g_loc_X) = 0; + RELOC(g_loc_Y) = 0; + RELOC(g_max_loc_X) = width / 8; + RELOC(g_max_loc_Y) = height / 16; + bi->logicalDisplayBase = (unsigned char *)address; + bi->dispDeviceBase = (unsigned char *)address; + bi->dispDeviceRowBytes = pitch; + bi->dispDeviceDepth = depth; + bi->dispDeviceRect[0] = bi->dispDeviceRect[1] = 0; + bi->dispDeviceRect[2] = width; + bi->dispDeviceRect[3] = height; + RELOC(boot_text_mapped) = 1; +} + +/* Here's a small text engine to use during early boot + * or for debugging purposes + * + * todo: + * + * - build some kind of vgacon with it to enable early printk + * - move to a separate file + * - add a few video driver hooks to keep in sync with display + * changes. + */ + +void map_boot_text(void) +{ + unsigned long base, offset, size; + boot_infos_t *bi = &disp_bi; + unsigned char *vbase; + + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + if (bi->dispDeviceBase == 0) + return; + base = ((unsigned long) bi->dispDeviceBase) & 0xFFFFF000UL; + offset = ((unsigned long) bi->dispDeviceBase) - base; + size = bi->dispDeviceRowBytes * bi->dispDeviceRect[3] + offset + + bi->dispDeviceRect[0]; + vbase = __ioremap(base, size, _PAGE_NO_CACHE); + if (vbase == 0) + return; + bi->logicalDisplayBase = vbase + offset; + boot_text_mapped = 1; +} + +/* Calc the base address of a given point (x,y) */ +static unsigned char * calc_base(boot_infos_t *bi, int x, int y) +{ + unsigned char *base; + + base = bi->logicalDisplayBase; + if (base == 0) + base = bi->dispDeviceBase; + base += (x + bi->dispDeviceRect[0]) * (bi->dispDeviceDepth >> 3); + base += (y + bi->dispDeviceRect[1]) * bi->dispDeviceRowBytes; + return base; +} + +/* Adjust the display to a new resolution */ +void btext_update_display(unsigned long phys, int width, int height, + int depth, int pitch) +{ + boot_infos_t *bi = &disp_bi; + + if (bi->dispDeviceBase == 0) + return; + + /* check it's the same frame buffer (within 256MB) */ + if ((phys ^ (unsigned long)bi->dispDeviceBase) & 0xf0000000) + return; + + bi->dispDeviceBase = (__u8 *) phys; + bi->dispDeviceRect[0] = 0; + bi->dispDeviceRect[1] = 0; + bi->dispDeviceRect[2] = width; + bi->dispDeviceRect[3] = height; + bi->dispDeviceDepth = depth; + bi->dispDeviceRowBytes = pitch; + if (boot_text_mapped) { + iounmap(bi->logicalDisplayBase); + boot_text_mapped = 0; + } + map_boot_text(); + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; +} + +void btext_clearscreen(void) +{ + unsigned long offset = reloc_offset(); + boot_infos_t* bi = PTRRELOC(&disp_bi); + unsigned long *base = (unsigned long *)calc_base(bi, 0, 0); + unsigned long width = ((bi->dispDeviceRect[2] - bi->dispDeviceRect[0]) * + (bi->dispDeviceDepth >> 3)) >> 3; + int i,j; + + for (i=0; i<(bi->dispDeviceRect[3] - bi->dispDeviceRect[1]); i++) + { + unsigned long *ptr = base; + for(j=width; j; --j) + *(ptr++) = 0; + base += (bi->dispDeviceRowBytes >> 3); + } +} + +#ifndef NO_SCROLL +static void scrollscreen(void) +{ + unsigned long offset = reloc_offset(); + boot_infos_t* bi = PTRRELOC(&disp_bi); + unsigned long *src = (unsigned long *)calc_base(bi,0,16); + unsigned long *dst = (unsigned long *)calc_base(bi,0,0); + unsigned long width = ((bi->dispDeviceRect[2] - bi->dispDeviceRect[0]) * + (bi->dispDeviceDepth >> 3)) >> 3; + int i,j; + + for (i=0; i<(bi->dispDeviceRect[3] - bi->dispDeviceRect[1] - 16); i++) + { + unsigned long *src_ptr = src; + unsigned long *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = *(src_ptr++); + src += (bi->dispDeviceRowBytes >> 3); + dst += (bi->dispDeviceRowBytes >> 3); + } + for (i=0; i<16; i++) + { + unsigned long *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = 0; + dst += (bi->dispDeviceRowBytes >> 3); + } +} +#endif /* ndef NO_SCROLL */ + +void btext_drawchar(char c) +{ + unsigned long offset = reloc_offset(); + int cline = 0; +#ifdef NO_SCROLL + int x; +#endif + if (!RELOC(boot_text_mapped)) + return; + + switch (c) { + case '\b': + if (RELOC(g_loc_X) > 0) + --RELOC(g_loc_X); + break; + case '\t': + RELOC(g_loc_X) = (RELOC(g_loc_X) & -8) + 8; + break; + case '\r': + RELOC(g_loc_X) = 0; + break; + case '\n': + RELOC(g_loc_X) = 0; + RELOC(g_loc_Y)++; + cline = 1; + break; + default: + draw_byte(c, RELOC(g_loc_X)++, RELOC(g_loc_Y)); + } + if (RELOC(g_loc_X) >= RELOC(g_max_loc_X)) { + RELOC(g_loc_X) = 0; + RELOC(g_loc_Y)++; + cline = 1; + } +#ifndef NO_SCROLL + while (RELOC(g_loc_Y) >= RELOC(g_max_loc_Y)) { + scrollscreen(); + RELOC(g_loc_Y)--; + } +#else + /* wrap around from bottom to top of screen so we don't + waste time scrolling each line. -- paulus. */ + if (RELOC(g_loc_Y) >= RELOC(g_max_loc_Y)) + RELOC(g_loc_Y) = 0; + if (cline) { + for (x = 0; x < RELOC(g_max_loc_X); ++x) + draw_byte(' ', x, RELOC(g_loc_Y)); + } +#endif +} + +void btext_drawstring(const char *c) +{ + unsigned long offset = reloc_offset(); + + if (!RELOC(boot_text_mapped)) + return; + while (*c) + btext_drawchar(*c++); +} + +void btext_drawhex(unsigned long v) +{ + unsigned long offset = reloc_offset(); + char *hex_table = RELOC("0123456789abcdef"); + + if (!RELOC(boot_text_mapped)) + return; + btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 8) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 4) & 0x0000000FUL]); + btext_drawchar(hex_table[(v >> 0) & 0x0000000FUL]); + btext_drawchar(' '); +} + +static void draw_byte(unsigned char c, long locX, long locY) +{ + unsigned long offset = reloc_offset(); + boot_infos_t* bi = PTRRELOC(&disp_bi); + unsigned char *base = calc_base(bi, locX << 3, locY << 4); + unsigned char *font = PTRRELOC(&vga_font[((unsigned int)c) * 16]); + int rb = bi->dispDeviceRowBytes; + +#if 0 + switch(bi->dispDeviceDepth) { + case 24: + case 32: + draw_byte_32(font, (unsigned int *)base, rb); + break; + case 15: + case 16: + draw_byte_16(font, (unsigned int *)base, rb); + break; + case 8: + draw_byte_8(font, (unsigned int *)base, rb); + break; + } +#else + if(bi->dispDeviceDepth == 24 || + bi->dispDeviceDepth == 32) { + draw_byte_32(font, (unsigned int *)base, rb); + } else if(bi->dispDeviceDepth == 15 || + bi->dispDeviceDepth == 16) { + draw_byte_16(font, (unsigned int *)base, rb); + } else if(bi->dispDeviceDepth == 8) { + draw_byte_8(font, (unsigned int *)base, rb); + } +#endif +} + +static unsigned int expand_bits_8[16] = { + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +static unsigned int expand_bits_16[4] = { + 0x00000000, + 0x0000ffff, + 0xffff0000, + 0xffffffff +}; + + +static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (-(bits >> 7) & fg) ^ bg; + base[1] = (-((bits >> 6) & 1) & fg) ^ bg; + base[2] = (-((bits >> 5) & 1) & fg) ^ bg; + base[3] = (-((bits >> 4) & 1) & fg) ^ bg; + base[4] = (-((bits >> 3) & 1) & fg) ^ bg; + base[5] = (-((bits >> 2) & 1) & fg) ^ bg; + base[6] = (-((bits >> 1) & 1) & fg) ^ bg; + base[7] = (-(bits & 1) & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + unsigned long offset = reloc_offset(); + unsigned int *eb = PTRRELOC((int *)expand_bits_16); + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 6] & fg) ^ bg; + base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; + base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; + base[3] = (eb[bits & 3] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0x0F0F0F0FUL; + int bg = 0x00000000UL; + unsigned long offset = reloc_offset(); + unsigned int *eb = PTRRELOC((int *)expand_bits_8); + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 4] & fg) ^ bg; + base[1] = (eb[bits & 0xf] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static unsigned char vga_font[cmapsz] = { +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, +0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, +0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, +0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, +0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, +0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, +0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, +0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, +0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, +0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, +0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, +0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, +0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, +0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, +0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, +0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, +0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, +0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, +0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, +0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, +0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, +0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, +0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, +0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, +0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, +0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, +0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, +0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, +0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, +0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, +0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, +0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, +0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, +0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, +0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, +0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, +0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, +0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, +0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, +0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, +0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, +0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, +0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, +0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, +0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, +0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, +0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, +0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, +0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, +0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, +0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, +0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, +0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, +0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, +0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, +0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, +0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, +0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, +0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, +0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, +0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, +0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, +0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, +0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, +0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, +0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, +0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, +0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, +0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, +0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, +0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, +0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, +0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, +0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, +0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, +0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, +}; --- linux-2.6.3-rc2/arch/ppc64/kernel/chrp_setup.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/chrp_setup.c 2004-02-12 00:39:25.000000000 -0800 @@ -67,9 +67,10 @@ void chrp_progress(char *, unsigned short); -extern void openpic_init_IRQ(void); +extern void pSeries_init_openpic(void); extern void find_and_init_phbs(void); +extern void pSeries_final_fixup(void); extern void pSeries_get_boot_time(struct rtc_time *rtc_time); extern void pSeries_get_rtc_time(struct rtc_time *rtc_time); @@ -178,6 +179,10 @@ chrp_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + +#ifdef CONFIG_PPC_PSERIES + pSeries_nvram_init(); +#endif } void __init @@ -252,7 +257,7 @@ chrp_init(unsigned long r3, unsigned lon ppc_md.setup_residual = NULL; ppc_md.get_cpuinfo = chrp_get_cpuinfo; if(naca->interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = openpic_init_IRQ; + ppc_md.init_IRQ = pSeries_init_openpic; ppc_md.get_irq = openpic_get_irq; } else { ppc_md.init_IRQ = xics_init_IRQ; @@ -261,6 +266,8 @@ chrp_init(unsigned long r3, unsigned lon ppc_md.init = chrp_init2; + ppc_md.pcibios_fixup = pSeries_final_fixup; + ppc_md.restart = rtas_restart; ppc_md.power_off = rtas_power_off; ppc_md.halt = rtas_halt; @@ -272,9 +279,6 @@ chrp_init(unsigned long r3, unsigned lon ppc_md.progress = chrp_progress; - ppc_md.nvram_read = pSeries_nvram_read; - ppc_md.nvram_write = pSeries_nvram_write; - /* Build up the firmware_features bitmask field * using contents of device-tree/ibm,hypertas-functions. * Ultimately this functionality may be moved into prom.c prom_init(). --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/cpu_setup_power4.S 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,179 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +_GLOBAL(__power4_cpu_preinit) + /* + * On the PPC970, we have to turn off real-mode cache inhibit + * early, before we first turn the MMU off. + */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 + bnelr + + li r0,0 + sync + mtspr SPRN_HID4,r0 + isync + sync + mtspr SPRN_HID5,r0 + isync + + mfspr r0,SPRN_HID1 + li r11,0x1200 /* enable i-fetch cacheability */ + sldi r11,r11,44 /* and prefetch */ + or r0,r0,r11 + mtspr SPRN_HID1,r0 + mtspr SPRN_HID1,r0 + isync + li r0,0 + sync + mtspr SPRN_HIOR,0 /* Clear interrupt prefix */ + isync + blr + +_GLOBAL(__setup_cpu_power4) + blr + +_GLOBAL(__setup_cpu_ppc970) + mfspr r0,SPRN_HID0 + li r11,5 /* clear DOZE and SLEEP */ + rldimi r0,r11,52,8 /* set NAP and DPM */ + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + sync + isync + blr + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 8 +#define CS_HID4 16 +#define CS_HID5 24 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES,0 +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + +/* Called in normal context to backup CPU 0 state. This + * does not include cache settings. This function is also + * called for machine sleep. This does not include the MMU + * setup, BATs, etc... but rather the "special" registers + * like HID0, HID1, HID4, etc... + */ +_GLOBAL(__save_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + LOADADDR(r5,cpu_state_storage) + + /* We only deal with 970 for now */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 + bne 1f + + /* Save HID0,1,4 and 5 */ + mfspr r3,SPRN_HID0 + std r3,CS_HID0(r5) + mfspr r3,SPRN_HID1 + std r3,CS_HID1(r5) + mfspr r3,SPRN_HID4 + std r3,CS_HID4(r5) + mfspr r3,SPRN_HID5 + std r3,CS_HID5(r5) + +1: + mtcr r7 + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_setup) + /* Get storage ptr (FIXME when using anton reloc as we + * are running with translation disabled here + */ + LOADADDR(r5,cpu_state_storage) + + /* We only deal with 970 for now */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 + bne 1f + + /* Clear interrupt prefix */ + li r0,0 + sync + mtspr SPRN_HIOR,0 + isync + + /* Restore HID0 */ + ld r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + mfspr r3,SPRN_HID0 + sync + isync + + /* Restore HID1 */ + ld r3,CS_HID1(r5) + sync + isync + mtspr SPRN_HID1,r3 + mtspr SPRN_HID1,r3 + sync + isync + + /* Restore HID4 */ + ld r3,CS_HID4(r5) + sync + isync + mtspr SPRN_HID4,r3 + sync + isync + + /* Restore HID5 */ + ld r3,CS_HID5(r5) + sync + isync + mtspr SPRN_HID5,r3 + sync + isync +1: + blr + --- linux-2.6.3-rc2/arch/ppc64/kernel/cputable.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/cputable.c 2004-02-12 00:39:25.000000000 -0800 @@ -30,6 +30,7 @@ struct cpu_spec* cur_cpu_spec = NULL; */ extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); /* We only set the altivec features if the kernel was compiled with altivec @@ -119,10 +120,10 @@ struct cpu_spec cpu_specs[] = { { /* PPC970 */ 0xffff0000, 0x00390000, "PPC970", CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP, + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP, COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, 128, 128, - __setup_cpu_power4, + __setup_cpu_ppc970, COMMON_PPC64_FW }, { /* Power5 */ --- linux-2.6.3-rc2/arch/ppc64/kernel/head.S 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/head.S 2004-02-12 00:39:25.000000000 -0800 @@ -1320,6 +1320,16 @@ _GLOBAL(__start_initialization_iSeries) #endif #ifdef CONFIG_PPC_PSERIES + +_STATIC(mmu_off) + mfmsr r3 + andi. r0,r3,MSR_IR|MSR_DR + beqlr + andc r3,r3,r0 + mtspr SRR0,r4 + mtspr SRR1,r3 + sync + rfid _GLOBAL(__start_initialization_pSeries) mr r31,r3 /* save parameters */ mr r30,r4 @@ -1339,33 +1349,27 @@ _GLOBAL(__start_initialization_pSeries) /* Relocate the TOC from a virt addr to a real addr */ sub r2,r2,r3 - /* DRENG / PPPBBB Fix the following comment!!! -Peter */ - /* The following copies the first 0x100 bytes of code from the */ - /* load addr to physical addr 0x0. This code causes secondary */ - /* processors to spin until a flag in the PACA is set. This */ - /* is done at this time rather than with the entire kernel */ - /* relocation which is done below because we need to cause the */ - /* processors to spin on code that is not going to move while OF */ - /* is still alive. Although the spin code is not actually run on */ - /* a uniprocessor, we always do this copy. */ - SET_REG_TO_CONST(r4, KERNELBASE)/* Src addr */ - sub r4,r4,r3 /* current address of __start */ - /* the source addr */ - li r3,0 /* Dest addr */ - li r5,0x100 /* # bytes of memory to copy */ - li r6,0 /* Destination offset */ - bl .copy_and_flush /* copy the first 0x100 bytes */ - + /* Save parameters */ mr r3,r31 mr r4,r30 mr r5,r29 mr r6,r28 mr r7,r27 + /* Do all of the interaction with OF client interface */ bl .prom_init + mr r23,r3 /* Save phys address we are running at */ + + /* Setup some critical 970 SPRs before switching MMU off */ + bl .__power4_cpu_preinit li r24,0 /* cpu # */ + /* Switch off MMU if not already */ + LOADADDR(r4, .__after_prom_start - KERNELBASE) + add r4,r4,r23 + bl .mmu_off + /* * At this point, r3 contains the physical address we are running at, * returned by prom_init() @@ -1390,6 +1394,7 @@ _STATIC(__after_prom_start) li r3,0 /* target addr */ + // XXX FIXME: Use phys returned by OF (r23) sub r4,r27,r26 /* source addr */ /* current address of _start */ /* i.e. where we are running */ @@ -1425,7 +1430,7 @@ _STATIC(__after_prom_start) * * Note: this routine *only* clobbers r0, r6 and lr */ -_STATIC(copy_and_flush) +_GLOBAL(copy_and_flush) addi r5,r5,-8 addi r6,r6,-8 4: li r0,16 /* Use the least common */ @@ -1675,6 +1680,58 @@ _GLOBAL(giveup_altivec) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_SMP +#ifdef CONFIG_PPC_PMAC +/* + * On PowerMac, secondary processors starts from the reset vector, which + * is temporarily turned into a call to one of the functions below. + */ + .section ".text"; + .align 2 ; + + .globl pmac_secondary_start_1 +pmac_secondary_start_1: + li r24, 1 + b .pmac_secondary_start + + .globl pmac_secondary_start_2 +pmac_secondary_start_2: + li r24, 2 + b .pmac_secondary_start + + .globl pmac_secondary_start_3 +pmac_secondary_start_3: + li r24, 3 + b .pmac_secondary_start + +_GLOBAL(pmac_secondary_start) + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* pSeries do that early though I don't think we really need it */ + mfmsr r3 + ori r3,r3,MSR_RI + mtmsrd r3 /* RI on */ + + /* Set up a paca value for this processor. */ + LOADADDR(r4, paca) /* Get base vaddr of paca array */ + mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r4 /* for this processor. */ + mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + + /* Create a temp kernel stack for use before relocation is on. */ + mr r1,r13 + addi r1,r1,PACAGUARD + addi r1,r1,0x1000 + subi r1,r1,STACK_FRAME_OVERHEAD + + b .__secondary_start + +#endif /* CONFIG_PPC_PMAC */ + /* * This function is called after the master CPU has released the * secondary processors. The execution environment is relocation off. @@ -1870,6 +1927,12 @@ _STATIC(start_here_pSeries) li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) + /* set up the TOC (physical address) */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + sub r2,r2,r26 + LOADADDR(r3,cpu_specs) sub r3,r3,r26 LOADADDR(r4,cur_cpu_spec) @@ -1877,12 +1940,6 @@ _STATIC(start_here_pSeries) mr r5,r26 bl .identify_cpu - /* set up the TOC (physical address) */ - LOADADDR(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - sub r2,r2,r26 - /* Get the pointer to the segment table which is used by */ /* stab_initialize */ LOADADDR(r27, boot_cpuid) @@ -1926,7 +1983,8 @@ _STATIC(start_here_pSeries) li r3,SYSTEMCFG_PHYS_ADDR /* r3 = ptr to systemcfg */ lwz r3,PLATFORM(r3) /* r3 = platform flags */ - cmpldi r3,PLATFORM_PSERIES + /* Test if bit 0 is set (LPAR bit) */ + andi. r3,r3,0x1 bne 98f LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ sub r6,r6,r26 @@ -1938,11 +1996,12 @@ _STATIC(start_here_pSeries) mtspr SRR0,r3 mtspr SRR1,r4 rfid -#endif /* CONFIG_PPC_PSERIES */ - +#endif /* CONFIG_PPC_PSERIES */ + /* This is where all platforms converge execution */ _STATIC(start_here_common) - + /* relocation is on at this point */ + /* The following code sets up the SP and TOC now that we are */ /* running with translation enabled. */ @@ -2013,8 +2072,6 @@ _STATIC(start_here_common) _GLOBAL(__setup_cpu_power3) blr -_GLOBAL(__setup_cpu_power4) - blr _GLOBAL(hmt_init) #ifdef CONFIG_HMT --- linux-2.6.3-rc2/arch/ppc64/kernel/idle.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/idle.c 2004-02-12 00:39:25.000000000 -0800 @@ -42,6 +42,7 @@ extern long cede_processor(void); extern long poll_pending(void); +extern void power4_idle(void); int (*idle_loop)(void); @@ -279,6 +280,17 @@ int cpu_idle(void) return 0; } +int native_idle(void) +{ + while(1) { + if (!need_resched()) + power4_idle(); + if (need_resched()) + schedule(); + } + return 0; +} + int idle_setup(void) { #ifdef CONFIG_PPC_ISERIES @@ -297,6 +309,9 @@ int idle_setup(void) printk("idle = default_idle\n"); idle_loop = default_idle; } + } else if (systemcfg->platform == PLATFORM_POWERMAC) { + printk("idle = native_idle\n"); + idle_loop = native_idle; } else { printk("idle_setup: unknown platform, use default_idle\n"); idle_loop = default_idle; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/idle_power4.S 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,79 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + + .text + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(power4_idle) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + LOADBASE(r3,cur_cpu_spec) + ld r4,cur_cpu_spec@l(r3) + ld r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beqlr + /* Now check if user or arch enabled NAP mode */ + LOADBASE(r3,powersave_nap) + lwz r4,powersave_nap@l(r3) + cmpi 0,r4,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + li r4,0 + ori r4,r4,MSR_EE + andc r0,r7,r4 + mtmsrd r0 + + /* Check current_thread_info()->flags */ + clrrdi r4,r1,THREAD_SHIFT + ld r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsrd r7 /* out of line this ? */ + blr +1: + /* Go to NAP now */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + oris r7,r7,MSR_POW@h + sync + isync + mtmsrd r7 + isync + sync + blr + --- linux-2.6.3-rc2/arch/ppc64/kernel/irq.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -664,7 +664,7 @@ cpumask_t irq_affinity [NR_IRQS] = { [0 static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -702,7 +702,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/ppc64/kernel/iSeries_pci.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/iSeries_pci.c 2004-02-12 00:39:25.000000000 -0800 @@ -241,9 +241,9 @@ void iSeries_pcibios_init(void) } /* - * pcibios_final_fixup(void) + * iSeries_pci_final_fixup(void) */ -void __init pcibios_final_fixup(void) +void __init iSeries_pci_final_fixup(void) { struct pci_dev *pdev = NULL; struct iSeries_Device_Node *node; --- linux-2.6.3-rc2/arch/ppc64/kernel/iSeries_setup.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/iSeries_setup.c 2004-02-12 00:39:25.000000000 -0800 @@ -63,10 +63,11 @@ extern void tce_init_iSeries(void); static void build_iSeries_Memory_Map(void); static void setup_iSeries_cache_sizes(void); static void iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr); -void build_valid_hpte(unsigned long vsid, unsigned long ea, unsigned long pa, - pte_t *ptep, unsigned hpteflags, unsigned bolted); +extern void build_valid_hpte(unsigned long vsid, unsigned long ea, unsigned long pa, + pte_t *ptep, unsigned hpteflags, unsigned bolted); static void iSeries_setup_dprofile(void); -void iSeries_setup_arch(void); +extern void iSeries_setup_arch(void); +extern void iSeries_pci_final_fixup(void); /* Global Variables */ static unsigned long procFreqHz; @@ -318,6 +319,8 @@ void __init iSeries_init_early(void) ppc_md.get_irq = iSeries_get_irq; ppc_md.init = NULL; + ppc_md.pcibios_fixup = iSeries_pci_final_fixup; + ppc_md.restart = iSeries_restart; ppc_md.power_off = iSeries_power_off; ppc_md.halt = iSeries_halt; --- linux-2.6.3-rc2/arch/ppc64/kernel/lparcfg.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/lparcfg.c 2004-02-12 00:40:49.000000000 -0800 @@ -134,7 +134,7 @@ static int lparcfg_data(unsigned char *b memset(buf, 0, size); shared = (int)(lpaca->xLpPacaPtr->xSharedProc); - n += snprintf(buf, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf, LPARCFG_BUFF_SIZE - n, "serial_number=%c%c%c%c%c%c%c\n", e2a(xItExtVpdPanel.mfgID[2]), e2a(xItExtVpdPanel.mfgID[3]), @@ -144,7 +144,7 @@ static int lparcfg_data(unsigned char *b e2a(xItExtVpdPanel.systemSerial[4]), e2a(xItExtVpdPanel.systemSerial[5])); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_type=%c%c%c%c\n", e2a(xItExtVpdPanel.machineType[0]), e2a(xItExtVpdPanel.machineType[1]), @@ -152,23 +152,23 @@ static int lparcfg_data(unsigned char *b e2a(xItExtVpdPanel.machineType[3])); lp_index = HvLpConfig_getLpIndex(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_id=%d\n", (int)lp_index); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_active_processors=%d\n", (int)HvLpConfig_getSystemPhysicalProcessors()); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_potential_processors=%d\n", (int)HvLpConfig_getSystemPhysicalProcessors()); processors = (int)HvLpConfig_getPhysicalProcessors(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_active_processors=%d\n", processors); max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_potential_processors=%d\n", max_processors); if(shared) { @@ -178,22 +178,22 @@ static int lparcfg_data(unsigned char *b entitled_capacity = processors * 100; max_entitled_capacity = max_processors * 100; } - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_entitled_capacity=%d\n", entitled_capacity); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_max_entitled_capacity=%d\n", max_entitled_capacity); if(shared) { pool_id = HvLpConfig_getSharedPoolIndex(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool=%d\n", + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool=%d\n", (int)pool_id); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool_capacity=%d\n", (int)(HvLpConfig_getNumProcsInSharedPool(pool_id)*100)); } - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "shared_processor_mode=%d\n", shared); return 0; @@ -297,13 +297,13 @@ static int lparcfg_data(unsigned char *b if(lp_index_ptr) lp_index = *lp_index_ptr; } - n = snprintf(buf, LPARCFG_BUFF_SIZE - n, + n = scnprintf(buf, LPARCFG_BUFF_SIZE - n, "serial_number=%s\n", system_id); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_type=%s\n", model); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_id=%d\n", (int)lp_index); rtas_node = find_path_device("/rtas"); @@ -317,74 +317,74 @@ static int lparcfg_data(unsigned char *b if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { h_get_ppp(&h_entitled,&h_unallocated,&h_aggregation,&h_resource); #ifdef DEBUG - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R4=0x%lx\n", h_entitled); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R5=0x%lx\n", h_unallocated); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R6=0x%lx\n", h_aggregation); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "R7=0x%lx\n", h_resource); #endif /* DEBUG */ } if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { system_potential_processors = get_splpar_potential_characteristics(); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_active_processors=%ld\n", (h_resource >> 2*8) & 0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_potential_processors=%d\n", system_potential_processors); } else { system_potential_processors = system_active_processors; - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_active_processors=%d\n", system_active_processors); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "system_potential_processors=%d\n", system_potential_processors); } processors = systemcfg->processorCount; - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_active_processors=%d\n", processors); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_potential_processors=%d\n", system_active_processors); /* max_entitled_capacity will come out of get_splpar_potential_characteristics() when that function is complete */ max_entitled_capacity = system_active_processors * 100; if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_entitled_capacity=%ld\n", h_entitled); } else { - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_entitled_capacity=%d\n", system_active_processors*100); } - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "partition_max_entitled_capacity=%d\n", max_entitled_capacity); shared = 0; - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "shared_processor_mode=%d\n", shared); if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool=%ld\n", (h_aggregation >> 0*8)&0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "pool_capacity=%ld\n", (h_resource >> 3*8) &0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "group=%ld\n", (h_aggregation >> 2*8)&0xffff); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "capped=%ld\n", (h_resource >> 6*8)&0x40); - n += snprintf(buf+n, LPARCFG_BUFF_SIZE - n, + n += scnprintf(buf+n, LPARCFG_BUFF_SIZE - n, "capacity_weight=%d\n", (int)(h_resource>>5*8)&0xFF); } return 0; --- linux-2.6.3-rc2/arch/ppc64/kernel/Makefile 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/Makefile 2004-02-12 00:39:25.000000000 -0800 @@ -10,9 +10,11 @@ obj-y := setup.o entry.o t align.o semaphore.o bitops.o stab.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o pmc.o rtc.o init_task.o \ - lmb.o cputable.o + lmb.o cputable.o cpu_setup_power4.o idle_power4.o -obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o +obj-$(CONFIG_PPC_OF) += of_device.o + +obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o pci_dma_direct.o ifdef CONFIG_PPC_ISERIES obj-$(CONFIG_PCI) += iSeries_pci.o iSeries_pci_reset.o \ @@ -27,7 +29,7 @@ obj-$(CONFIG_PPC_ISERIES) += iSeries_irq proc_pmc.o obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ - eeh.o nvram.o rtasd.o ras.o \ + eeh.o nvram.o pSeries_nvram.o rtasd.o ras.o \ open_pic.o xics.o pSeries_htab.o rtas.o \ chrp_setup.o i8259.o prom.o vio.o @@ -39,6 +41,14 @@ obj-$(CONFIG_PPC_RTAS) += rtas-proc.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_VIOPATH) += viopath.o obj-$(CONFIG_LPARCFG) += lparcfg.o -obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o +obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o +obj-$(CONFIG_BOOTX_TEXT) += btext.o + +obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ + pmac_time.o pmac_nvram.o pmac_low_i2c.o \ + open_pic_u3.o +ifdef CONFIG_SMP +obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o +endif CFLAGS_ioctl32.o += -Ifs/ --- linux-2.6.3-rc2/arch/ppc64/kernel/misc.S 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/misc.S 2004-02-12 00:39:25.000000000 -0800 @@ -763,7 +763,7 @@ _GLOBAL(sys_call_table32) .llong .sys_fstat64 .llong .sys32_pciconfig_read .llong .sys32_pciconfig_write - .llong .sys_ni_syscall /* 200 - old pciconfig_iobase */ + .llong .sys32_pciconfig_iobase /* 200 - pciconfig_iobase */ .llong .sys_ni_syscall /* reserved for MacOnLinux */ .llong .sys_getdents64 .llong .sys_pivot_root @@ -1022,7 +1022,7 @@ _GLOBAL(sys_call_table) .llong .sys_ni_syscall /* 32bit only fstat64 */ .llong .sys_ni_syscall /* 32bit only pciconfig_read */ .llong .sys_ni_syscall /* 32bit only pciconfig_write */ - .llong .sys_ni_syscall /* 200 - old pciconfig_iobase */ + .llong .sys_ni_syscall /* 32bit only pciconfig_iobase */ .llong .sys_ni_syscall /* reserved for MacOnLinux */ .llong .sys_getdents64 .llong .sys_pivot_root --- linux-2.6.3-rc2/arch/ppc64/kernel/nvram.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/nvram.c 2004-02-12 00:39:25.000000000 -0800 @@ -9,6 +9,10 @@ * /dev/nvram driver for PPC64 * * This perhaps should live in drivers/char + * + * TODO: Split the /dev/nvram part (that one can use + * drivers/char/generic_nvram.c) from the arch & partition + * parsing code. */ #include @@ -34,16 +38,10 @@ static int nvram_scan_partitions(void); static int nvram_setup_partition(void); static int nvram_create_os_partition(void); static int nvram_remove_os_partition(void); -static unsigned char nvram_checksum(struct nvram_header *p); -static int nvram_write_header(struct nvram_partition * part); -static unsigned int nvram_size; -static unsigned int nvram_fetch, nvram_store; -static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ static struct nvram_partition * nvram_part; static long nvram_error_log_index = -1; static long nvram_error_log_size = 0; -static spinlock_t nvram_lock = SPIN_LOCK_UNLOCKED; volatile int no_more_logging = 1; /* Until we initialize everything, * make sure we don't try logging @@ -58,12 +56,18 @@ struct err_log_info { static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) { + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); + switch (origin) { case 1: offset += file->f_pos; break; case 2: - offset += nvram_size; + offset += size; break; } if (offset < 0) @@ -78,13 +82,18 @@ static ssize_t dev_nvram_read(struct fil { ssize_t len; char *tmp_buffer; + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); if (verify_area(VERIFY_WRITE, buf, count)) return -EFAULT; - if (*ppos >= nvram_size) + if (*ppos >= size) return 0; - if (count > nvram_size) - count = nvram_size; + if (count > size) + count = size; tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); if (!tmp_buffer) { @@ -113,13 +122,18 @@ static ssize_t dev_nvram_write(struct fi { ssize_t len; char * tmp_buffer; + int size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + size = ppc_md.nvram_size(); if (verify_area(VERIFY_READ, buf, count)) return -EFAULT; - if (*ppos >= nvram_size) + if (*ppos >= size) return 0; - if (count > nvram_size) - count = nvram_size; + if (count > size) + count = size; tmp_buffer = (char *) kmalloc(count, GFP_KERNEL); if (!tmp_buffer) { @@ -145,6 +159,28 @@ static ssize_t dev_nvram_write(struct fi static int dev_nvram_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { + switch(cmd) { +#ifdef CONFIG_PPC_PMAC + case OBSOLETE_PMAC_NVRAM_GET_OFFSET: + printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n"); + case IOC_NVRAM_GET_OFFSET: { + int part, offset; + + if (systemcfg->platform != PLATFORM_POWERMAC) + return -EINVAL; + if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) + return -EFAULT; + if (part < pmac_nvram_OF || part > pmac_nvram_NR) + return -EINVAL; + offset = pmac_get_partition(part); + if (offset < 0) + return offset; + if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0) + return -EFAULT; + return 0; + } +#endif /* CONFIG_PPC_PMAC */ + } return -EINVAL; } @@ -162,259 +198,75 @@ static struct miscdevice nvram_dev = { &nvram_fops }; -ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) -{ - unsigned int i; - unsigned long len, done; - unsigned long flags; - char *p = buf; - - if (*index >= nvram_size) - return 0; - - i = *index; - if (i + count > nvram_size) - count = nvram_size - i; - - spin_lock_irqsave(&nvram_lock, flags); - - for (; count != 0; count -= len) { - len = count; - if (len > NVRW_CNT) - len = NVRW_CNT; - - if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), - len) != 0) || len != done) { - spin_unlock_irqrestore(&nvram_lock, flags); - return -EIO; - } - - memcpy(p, nvram_buf, len); - p += len; - i += len; - } - spin_unlock_irqrestore(&nvram_lock, flags); +static void nvram_print_partitions(char * label) +{ + struct list_head * p; + struct nvram_partition * tmp_part; - *index = i; - return p - buf; + printk(KERN_WARNING "--------%s---------\n", label); + printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); + list_for_each(p, &nvram_part->partition) { + tmp_part = list_entry(p, struct nvram_partition, partition); + printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n", + tmp_part->index, tmp_part->header.signature, + tmp_part->header.checksum, tmp_part->header.length, + tmp_part->header.name); + } } -ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) -{ - unsigned int i; - unsigned long len, done; - unsigned long flags; - const char *p = buf; - - if (*index >= nvram_size) - return 0; - i = *index; - if (i + count > nvram_size) - count = nvram_size - i; - - spin_lock_irqsave(&nvram_lock, flags); - - for (; count != 0; count -= len) { - len = count; - if (len > NVRW_CNT) - len = NVRW_CNT; - - memcpy(nvram_buf, p, len); - - if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), - len) != 0) || len != done) { - spin_unlock_irqrestore(&nvram_lock, flags); - return -EIO; - } - - p += len; - i += len; - } - spin_unlock_irqrestore(&nvram_lock, flags); - - *index = i; - return p - buf; -} - -int __init nvram_init(void) +static int nvram_write_header(struct nvram_partition * part) { - struct device_node *nvram; - unsigned int *nbytes_p, proplen; - int error; + loff_t tmp_index; int rc; - if ((nvram = of_find_node_by_type(NULL, "nvram")) != NULL) { - nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); - if (nbytes_p && proplen == sizeof(unsigned int)) { - nvram_size = *nbytes_p; - } else { - return -EIO; - } - } - nvram_fetch = rtas_token("nvram-fetch"); - nvram_store = rtas_token("nvram-store"); - printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); - of_node_put(nvram); - - rc = misc_register(&nvram_dev); - - /* If we don't know how big NVRAM is then we shouldn't touch - the nvram partitions */ - if (nvram == NULL) { - return rc; - } - - /* initialize our anchor for the nvram partition list */ - nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); - if (!nvram_part) { - printk(KERN_ERR "nvram_init: Failed kmalloc\n"); - return -ENOMEM; - } - INIT_LIST_HEAD(&nvram_part->partition); - - /* Get all the NVRAM partitions */ - error = nvram_scan_partitions(); - if (error) { - printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n"); - return error; - } - - if(nvram_setup_partition()) - printk(KERN_WARNING "nvram_init: Could not find nvram partition" - " for nvram buffered error logging.\n"); - -#ifdef DEBUG_NVRAM - nvram_print_partitions("NVRAM Partitions"); -#endif + tmp_index = part->index; + rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); - return rc; + return rc; } -void __exit nvram_cleanup(void) -{ - misc_deregister( &nvram_dev ); -} -static int nvram_scan_partitions(void) +static unsigned char nvram_checksum(struct nvram_header *p) { - loff_t cur_index = 0; - struct nvram_header phead; - struct nvram_partition * tmp_part; - unsigned char c_sum; - char * header; - long size; - - header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL); - if (!header) { - printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n"); - return -ENOMEM; - } - - while (cur_index < nvram_size) { - - size = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index); - if (size != NVRAM_HEADER_LEN) { - printk(KERN_ERR "nvram_scan_partitions: Error parsing " - "nvram partitions\n"); - kfree(header); - return size; - } - - cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */ - - memcpy(&phead, header, NVRAM_HEADER_LEN); - - c_sum = nvram_checksum(&phead); - if (c_sum != phead.checksum) - printk(KERN_WARNING "WARNING: nvram partition checksum " - "was %02x, should be %02x!\n", phead.checksum, c_sum); - - tmp_part = (struct nvram_partition *) - kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); - if (!tmp_part) { - printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); - kfree(header); - return -ENOMEM; - } - - memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); - tmp_part->index = cur_index; - list_add_tail(&tmp_part->partition, &nvram_part->partition); - - cur_index += phead.length * NVRAM_BLOCK_LEN; - } + unsigned int c_sum, c_sum2; + unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ + c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5]; - kfree(header); - return 0; + /* The sum may have spilled into the 3rd byte. Fold it back. */ + c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff; + /* The sum cannot exceed 2 bytes. Fold it into a checksum */ + c_sum2 = (c_sum >> 8) + (c_sum << 8); + c_sum = ((c_sum + c_sum2) >> 8) & 0xff; + return c_sum; } -/* nvram_setup_partition - * - * This will setup the partition we need for buffering the - * error logs and cleanup partitions if needed. - * - * The general strategy is the following: - * 1.) If there is ppc64,linux partition large enough then use it. - * 2.) If there is not a ppc64,linux partition large enough, search - * for a free partition that is large enough. - * 3.) If there is not a free partition large enough remove - * _all_ OS partitions and consolidate the space. - * 4.) Will first try getting a chunk that will satisfy the maximum - * error log size (NVRAM_MAX_REQ). - * 5.) If the max chunk cannot be allocated then try finding a chunk - * that will satisfy the minum needed (NVRAM_MIN_REQ). + +/* + * Find an nvram partition, sig can be 0 for any + * partition or name can be NULL for any name, else + * tries to match both */ -static int nvram_setup_partition(void) +struct nvram_partition *nvram_find_partition(int sig, const char *name) { - struct list_head * p; struct nvram_partition * part; - int rc; + struct list_head * p; - /* see if we have an OS partition that meets our needs. - will try getting the max we need. If not we'll delete - partitions and try again. */ list_for_each(p, &nvram_part->partition) { part = list_entry(p, struct nvram_partition, partition); - if (part->header.signature != NVRAM_SIG_OS) - continue; - if (strcmp(part->header.name, "ppc64,linux")) + if (sig && part->header.signature != sig) continue; - - if (part->header.length >= NVRAM_MIN_REQ) { - /* found our partition */ - nvram_error_log_index = part->index + NVRAM_HEADER_LEN; - nvram_error_log_size = ((part->header.length - 1) * - NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); - return 0; - } - } - - /* try creating a partition with the free space we have */ - rc = nvram_create_os_partition(); - if (!rc) { - return 0; - } - - /* need to free up some space */ - rc = nvram_remove_os_partition(); - if (rc) { - return rc; - } - - /* create a partition in this new space */ - rc = nvram_create_os_partition(); - if (rc) { - printk(KERN_ERR "nvram_create_os_partition: Could not find a " - "NVRAM partition large enough\n"); - return rc; + if (name && 0 != strncmp(name, part->header.name, 12)) + continue; + return part; } - - return 0; + return NULL; } +EXPORT_SYMBOL(nvram_find_partition); + static int nvram_remove_os_partition(void) { @@ -572,22 +424,185 @@ static int nvram_create_os_partition(voi } -void nvram_print_partitions(char * label) +/* nvram_setup_partition + * + * This will setup the partition we need for buffering the + * error logs and cleanup partitions if needed. + * + * The general strategy is the following: + * 1.) If there is ppc64,linux partition large enough then use it. + * 2.) If there is not a ppc64,linux partition large enough, search + * for a free partition that is large enough. + * 3.) If there is not a free partition large enough remove + * _all_ OS partitions and consolidate the space. + * 4.) Will first try getting a chunk that will satisfy the maximum + * error log size (NVRAM_MAX_REQ). + * 5.) If the max chunk cannot be allocated then try finding a chunk + * that will satisfy the minum needed (NVRAM_MIN_REQ). + */ +static int nvram_setup_partition(void) { struct list_head * p; + struct nvram_partition * part; + int rc; + + /* For now, we don't do any of this on pmac, until I + * have figured out if it's worth killing some unused stuffs + * in our nvram, as Apple defined partitions use pretty much + * all of the space + */ + if (systemcfg->platform == PLATFORM_POWERMAC) + return -ENOSPC; + + /* see if we have an OS partition that meets our needs. + will try getting the max we need. If not we'll delete + partitions and try again. */ + list_for_each(p, &nvram_part->partition) { + part = list_entry(p, struct nvram_partition, partition); + if (part->header.signature != NVRAM_SIG_OS) + continue; + + if (strcmp(part->header.name, "ppc64,linux")) + continue; + + if (part->header.length >= NVRAM_MIN_REQ) { + /* found our partition */ + nvram_error_log_index = part->index + NVRAM_HEADER_LEN; + nvram_error_log_size = ((part->header.length - 1) * + NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); + return 0; + } + } + + /* try creating a partition with the free space we have */ + rc = nvram_create_os_partition(); + if (!rc) { + return 0; + } + + /* need to free up some space */ + rc = nvram_remove_os_partition(); + if (rc) { + return rc; + } + + /* create a partition in this new space */ + rc = nvram_create_os_partition(); + if (rc) { + printk(KERN_ERR "nvram_create_os_partition: Could not find a " + "NVRAM partition large enough\n"); + return rc; + } + + return 0; +} + + +static int nvram_scan_partitions(void) +{ + loff_t cur_index = 0; + struct nvram_header phead; struct nvram_partition * tmp_part; + unsigned char c_sum; + char * header; + long size; + int total_size; + + if (ppc_md.nvram_size == NULL) + return -ENODEV; + total_size = ppc_md.nvram_size(); - printk(KERN_WARNING "--------%s---------\n", label); - printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); - list_for_each(p, &nvram_part->partition) { - tmp_part = list_entry(p, struct nvram_partition, partition); - printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n", - tmp_part->index, tmp_part->header.signature, - tmp_part->header.checksum, tmp_part->header.length, - tmp_part->header.name); + header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL); + if (!header) { + printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n"); + return -ENOMEM; + } + + while (cur_index < total_size) { + + size = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index); + if (size != NVRAM_HEADER_LEN) { + printk(KERN_ERR "nvram_scan_partitions: Error parsing " + "nvram partitions\n"); + kfree(header); + return size; + } + + cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */ + + memcpy(&phead, header, NVRAM_HEADER_LEN); + + c_sum = nvram_checksum(&phead); + if (c_sum != phead.checksum) + printk(KERN_WARNING "WARNING: nvram partition checksum " + "was %02x, should be %02x!\n", phead.checksum, c_sum); + + tmp_part = (struct nvram_partition *) + kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + if (!tmp_part) { + printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); + kfree(header); + return -ENOMEM; + } + + memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); + tmp_part->index = cur_index; + list_add_tail(&tmp_part->partition, &nvram_part->partition); + + cur_index += phead.length * NVRAM_BLOCK_LEN; } + + kfree(header); + return 0; } +static int __init nvram_init(void) +{ + int error; + int rc; + + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) + return -ENODEV; + + rc = misc_register(&nvram_dev); + if (rc != 0) { + printk(KERN_ERR "nvram_init: failed to register device\n"); + return rc; + } + + /* initialize our anchor for the nvram partition list */ + nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + if (!nvram_part) { + printk(KERN_ERR "nvram_init: Failed kmalloc\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&nvram_part->partition); + + /* Get all the NVRAM partitions */ + error = nvram_scan_partitions(); + if (error) { + printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n"); + return error; + } + + if(nvram_setup_partition()) + printk(KERN_WARNING "nvram_init: Could not find nvram partition" + " for nvram buffered error logging.\n"); + +#ifdef DEBUG_NVRAM + nvram_print_partitions("NVRAM Partitions"); +#endif + + return rc; +} + +void __exit nvram_cleanup(void) +{ + misc_deregister( &nvram_dev ); +} + + + /* nvram_write_error_log * * We need to buffer the error logs into nvram to ensure that we have @@ -711,30 +726,6 @@ int nvram_clear_error_log() return 0; } -static int nvram_write_header(struct nvram_partition * part) -{ - loff_t tmp_index; - int rc; - - tmp_index = part->index; - rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); - - return rc; -} - -static unsigned char nvram_checksum(struct nvram_header *p) -{ - unsigned int c_sum, c_sum2; - unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ - c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5]; - - /* The sum may have spilled into the 3rd byte. Fold it back. */ - c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff; - /* The sum cannot exceed 2 bytes. Fold it into a checksum */ - c_sum2 = (c_sum >> 8) + (c_sum << 8); - c_sum = ((c_sum + c_sum2) >> 8) & 0xff; - return c_sum; -} module_init(nvram_init); module_exit(nvram_cleanup); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/of_device.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#include +#include +#include + +/** + * of_match_device - Tell if an of_device structure has a matching + * of_match structure + * @ids: array of of device match structures to search in + * @dev: the of device structure to match against + * + * Used by a driver to check whether an of_device present in the + * system is in its list of supported devices. + */ +const struct of_match * of_match_device(const struct of_match *matches, + const struct of_device *dev) +{ + if (!dev->node) + return NULL; + while (matches->name || matches->type || matches->compatible) { + int match = 1; + if (matches->name && matches->name != OF_ANY_MATCH) + match &= dev->node->name + && !strcmp(matches->name, dev->node->name); + if (matches->type && matches->type != OF_ANY_MATCH) + match &= dev->node->type + && !strcmp(matches->type, dev->node->type); + if (matches->compatible && matches->compatible != OF_ANY_MATCH) + match &= device_is_compatible(dev->node, + matches->compatible); + if (match) + return matches; + matches++; + } + return NULL; +} + +static int of_platform_bus_match(struct device *dev, struct device_driver *drv) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * of_drv = to_of_platform_driver(drv); + const struct of_match * matches = of_drv->match_table; + + if (!matches) + return 0; + + return of_match_device(matches, of_dev) != NULL; +} + +struct of_device *of_dev_get(struct of_device *dev) +{ + struct device *tmp; + + if (!dev) + return NULL; + tmp = get_device(&dev->dev); + if (tmp) + return to_of_device(tmp); + else + return NULL; +} + +void of_dev_put(struct of_device *dev) +{ + if (dev) + put_device(&dev->dev); +} + + +static int of_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct of_platform_driver *drv; + struct of_device *of_dev; + const struct of_match *match; + + drv = to_of_platform_driver(dev->driver); + of_dev = to_of_device(dev); + + if (!drv->probe) + return error; + + of_dev_get(of_dev); + + match = of_match_device(drv->match_table, of_dev); + if (match) + error = drv->probe(of_dev, match); + if (error) + of_dev_put(of_dev); + + return error; +} + +static int of_device_remove(struct device *dev) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); + return 0; +} + +static int of_device_suspend(struct device *dev, u32 state) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + int error = 0; + + if (dev->driver && drv->suspend) + error = drv->suspend(of_dev, state); + return error; +} + +static int of_device_resume(struct device * dev) +{ + struct of_device * of_dev = to_of_device(dev); + struct of_platform_driver * drv = to_of_platform_driver(dev->driver); + int error = 0; + + if (dev->driver && drv->resume) + error = drv->resume(of_dev); + return error; +} + +struct bus_type of_platform_bus_type = { + .name = "of_platform", + .match = of_platform_bus_match, + .suspend = of_device_suspend, + .resume = of_device_resume, +}; + +static int __init of_bus_driver_init(void) +{ + return bus_register(&of_platform_bus_type); +} + +postcore_initcall(of_bus_driver_init); + +int of_register_driver(struct of_platform_driver *drv) +{ + int count = 0; + + /* initialize common driver fields */ + drv->driver.name = drv->name; + drv->driver.bus = &of_platform_bus_type; + drv->driver.probe = of_device_probe; + drv->driver.remove = of_device_remove; + + /* register with core */ + count = driver_register(&drv->driver); + return count ? count : 1; +} + +void of_unregister_driver(struct of_platform_driver *drv) +{ + driver_unregister(&drv->driver); +} + + +static ssize_t dev_show_devspec(struct device *dev, char *buf) +{ + struct of_device *ofdev; + + ofdev = to_of_device(dev); + return sprintf(buf, "%s", ofdev->node->full_name); +} + +static DEVICE_ATTR(devspec, S_IRUGO, dev_show_devspec, NULL); + +/** + * of_release_dev - free an of device structure when all users of it are finished. + * @dev: device that's been disconnected + * + * Will be called only by the device core when all users of this of device are + * done. + */ +void of_release_dev(struct device *dev) +{ + struct of_device *ofdev; + + ofdev = to_of_device(dev); + kfree(ofdev); +} + +int of_device_register(struct of_device *ofdev) +{ + int rc; + struct of_device **odprop; + + BUG_ON(ofdev->node == NULL); + + odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); + if (!odprop) { + struct property *new_prop; + + new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *), + GFP_KERNEL); + if (new_prop == NULL) + return -ENOMEM; + new_prop->name = "linux,device"; + new_prop->length = sizeof(sizeof(struct of_device *)); + new_prop->value = (unsigned char *)&new_prop[1]; + odprop = (struct of_device **)new_prop->value; + *odprop = NULL; + prom_add_property(ofdev->node, new_prop); + } + *odprop = ofdev; + + rc = device_register(&ofdev->dev); + if (rc) + return rc; + + device_create_file(&ofdev->dev, &dev_attr_devspec); + + return 0; +} + +void of_device_unregister(struct of_device *ofdev) +{ + struct of_device **odprop; + + device_remove_file(&ofdev->dev, &dev_attr_devspec); + + odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); + if (odprop) + *odprop = NULL; + + device_unregister(&ofdev->dev); +} + +struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) +{ + struct of_device *dev; + u32 *reg; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + memset(dev, 0, sizeof(*dev)); + + dev->node = np; + dev->dma_mask = 0xffffffffUL; + dev->dev.dma_mask = &dev->dma_mask; + dev->dev.parent = NULL; + dev->dev.bus = &of_platform_bus_type; + dev->dev.release = of_release_dev; + + reg = (u32 *)get_property(np, "reg", NULL); + strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); + + if (of_device_register(dev) != 0) { + kfree(dev); + return NULL; + } + + return dev; +} + +EXPORT_SYMBOL(of_match_device); +EXPORT_SYMBOL(of_platform_bus_type); +EXPORT_SYMBOL(of_register_driver); +EXPORT_SYMBOL(of_unregister_driver); +EXPORT_SYMBOL(of_device_register); +EXPORT_SYMBOL(of_device_unregister); +EXPORT_SYMBOL(of_dev_get); +EXPORT_SYMBOL(of_dev_put); +EXPORT_SYMBOL(of_platform_device_create); +EXPORT_SYMBOL(of_release_dev); --- linux-2.6.3-rc2/arch/ppc64/kernel/open_pic.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/open_pic.c 2004-02-12 00:39:25.000000000 -0800 @@ -34,7 +34,42 @@ static volatile struct OpenPIC *OpenPIC u_int OpenPIC_NumInitSenses __initdata = 0; u_char *OpenPIC_InitSenses __initdata = NULL; -void find_ISUs(void); +/* + * Local (static) OpenPIC Operations + */ + + +/* Global Operations */ +static void openpic_reset(void); +static void openpic_enable_8259_pass_through(void); +static void openpic_disable_8259_pass_through(void); +static u_int openpic_irq(void); +static void openpic_eoi(void); +static u_int openpic_get_priority(void); +static void openpic_set_priority(u_int pri); +static u_int openpic_get_spurious(void); +static void openpic_set_spurious(u_int vector); + +#ifdef CONFIG_SMP +/* Interprocessor Interrupts */ +static void openpic_initipi(u_int ipi, u_int pri, u_int vector); +static irqreturn_t openpic_ipi_action(int cpl, void *dev_id, + struct pt_regs *regs); +#endif + +/* Timer Interrupts */ +static void openpic_inittimer(u_int timer, u_int pri, u_int vector); +static void openpic_maptimer(u_int timer, u_int cpumask); + +/* Interrupt Sources */ +static void openpic_enable_irq(u_int irq); +static void openpic_disable_irq(u_int irq); +static void openpic_initirq(u_int irq, u_int pri, u_int vector, int polarity, + int is_level); +static void openpic_mapirq(u_int irq, u_int cpumask); +static void openpic_set_sense(u_int irq, int sense); + +static void find_ISUs(void); static u_int NumProcessors; static u_int NumSources; @@ -130,7 +165,7 @@ unsigned int openpic_vec_spurious; #define GET_ISU(source) ISU[(source) >> 4][(source) & 0xf] -void __init openpic_init_IRQ(void) +void __init pSeries_init_openpic(void) { struct device_node *np; int i; @@ -359,9 +394,12 @@ void __init openpic_init(int main_pic, i } /* Init all external sources */ - for (i = 1; i < NumSources; i++) { + for (i = 0; i < NumSources; i++) { int pri, sense; + /* skip cascade if any */ + if (offset && i == 0) + continue; /* the bootloader may have left it enabled (bad !) */ openpic_disable_irq(i+offset); @@ -396,6 +434,9 @@ void __init openpic_init(int main_pic, i */ static int __init openpic_setup_i8259(void) { + if (systemcfg->platform == PLATFORM_POWERMAC) + return 0; + if (naca->interrupt_controller == IC_OPEN_PIC) { /* Initialize the cascade */ if (request_irq(NUM_8259_INTERRUPTS, no_action, SA_INTERRUPT, @@ -419,6 +460,14 @@ void openpic_setup_ISU(int isu_num, unsi void find_ISUs(void) { + /* For PowerMac, setup ISUs on base openpic */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + int i; + for (i=0; i<128; i+=0x10) { + ISU[i>>4] = &((struct OpenPIC *)OpenPIC_Addr)->Source[i]; + NumISUs++; + } + } /* Use /interrupt-controller/reg and * /interrupt-controller/interrupt-ranges from OF device tree * the ISU array is setup in chrp_pci.c in ibm_add_bridges @@ -429,11 +478,22 @@ void find_ISUs(void) /* basically each ISU is a bus, and this assumes that * open_pic_isu_count interrupts per bus are possible * ISU == Interrupt Source + * + * On G5, we keep the original NumSources provided by the controller, + * it's below 128, so we have room to stuff the IPIs and timers like darwin + * does. We put the spurrious vector up at 0xff though. */ - NumSources = NumISUs * 0x10; - openpic_vec_ipi = NumSources + open_pic_irq_offset; - openpic_vec_timer = openpic_vec_ipi + OPENPIC_NUM_IPI; - openpic_vec_spurious = openpic_vec_timer + OPENPIC_NUM_TIMERS; + if (systemcfg->platform == PLATFORM_POWERMAC) { + openpic_vec_ipi = NumSources; + openpic_vec_timer = openpic_vec_ipi + 4; + openpic_vec_spurious = 0xff; + } else { + NumSources = NumISUs * 0x10; + + openpic_vec_ipi = NumSources + open_pic_irq_offset; + openpic_vec_timer = openpic_vec_ipi + OPENPIC_NUM_IPI; + openpic_vec_spurious = openpic_vec_timer + OPENPIC_NUM_TIMERS; + } } static inline void openpic_reset(void) @@ -767,8 +827,7 @@ static inline void openpic_set_sense(u_i static void openpic_end_irq(unsigned int irq_nr) { - if ((irq_desc[irq_nr].status & IRQ_LEVEL) != 0) - openpic_eoi(); + openpic_eoi(); } static void openpic_set_affinity(unsigned int irq_nr, cpumask_t cpumask) @@ -807,9 +866,7 @@ int openpic_get_irq(struct pt_regs *regs int irq = openpic_irq(); - /* Management of the cascade should be moved out of here */ - if (open_pic_irq_offset && irq == open_pic_irq_offset) - { + if (open_pic_irq_offset && irq == open_pic_irq_offset) { /* * This magic address generates a PCI IACK cycle. */ --- linux-2.6.3-rc2/arch/ppc64/kernel/open_pic_defs.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/ppc64/kernel/open_pic_defs.h 2004-02-12 00:39:25.000000000 -0800 @@ -279,40 +279,6 @@ extern volatile struct OpenPIC *OpenPIC; #define Vector_Priority _Vector_Priority.Reg #define Destination _Destination.Reg -/* - * Local (static) OpenPIC Operations - */ - - -/* Global Operations */ -static void openpic_reset(void); -static void openpic_enable_8259_pass_through(void); -static void openpic_disable_8259_pass_through(void); -static u_int openpic_irq(void); -static void openpic_eoi(void); -static u_int openpic_get_priority(void); -static void openpic_set_priority(u_int pri); -static u_int openpic_get_spurious(void); -static void openpic_set_spurious(u_int vector); - -#ifdef CONFIG_SMP -/* Interprocessor Interrupts */ -static void openpic_initipi(u_int ipi, u_int pri, u_int vector); -static irqreturn_t openpic_ipi_action(int cpl, void *dev_id, - struct pt_regs *regs); -#endif - -/* Timer Interrupts */ -static void openpic_inittimer(u_int timer, u_int pri, u_int vector); -static void openpic_maptimer(u_int timer, u_int cpumask); - -/* Interrupt Sources */ -static void openpic_enable_irq(u_int irq); -static void openpic_disable_irq(u_int irq); -static void openpic_initirq(u_int irq, u_int pri, u_int vector, int polarity, - int is_level); -static void openpic_mapirq(u_int irq, u_int cpumask); -static void openpic_set_sense(u_int irq, int sense); #endif /* __KERNEL__ */ --- linux-2.6.3-rc2/arch/ppc64/kernel/open_pic.h 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/ppc64/kernel/open_pic.h 2004-02-12 00:39:25.000000000 -0800 @@ -40,6 +40,8 @@ extern void openpic_cause_IPI(u_int ipi, extern inline int openpic_to_irq(int irq) { + if (systemcfg->platform == PLATFORM_POWERMAC) + return irq; return irq += NUM_8259_INTERRUPTS; } /*extern int open_pic_irq_offset;*/ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/open_pic_u3.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,348 @@ +/* + * arch/ppc/kernel/open_pic.c -- OpenPIC Interrupt Handling + * + * Copyright (C) 1997 Geert Uytterhoeven + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "open_pic.h" +#include "open_pic_defs.h" + +void* OpenPIC2_Addr; +static volatile struct OpenPIC *OpenPIC2 = NULL; + +extern u_int OpenPIC_NumInitSenses; +extern u_char *OpenPIC_InitSenses; + +static u_int NumSources; +static int NumISUs; +static int open_pic2_irq_offset; + +static OpenPIC_SourcePtr ISU2[OPENPIC_MAX_ISU]; + +unsigned int openpic2_vec_spurious; + +/* + * Accesses to the current processor's openpic registers + * U3 secondary openpic has only one output + */ +#define THIS_CPU Processor[0] +#define DECL_THIS_CPU +#define CHECK_THIS_CPU + +#define GET_ISU(source) ISU2[(source) >> 4][(source) & 0xf] + +static inline u_int openpic2_read(volatile u_int *addr) +{ + u_int val; + + val = in_be32(addr); + return val; +} + +static inline void openpic2_write(volatile u_int *addr, u_int val) +{ + out_be32(addr, val); +} + +static inline u_int openpic2_readfield(volatile u_int *addr, u_int mask) +{ + u_int val = openpic2_read(addr); + return val & mask; +} + +static inline void openpic2_writefield(volatile u_int *addr, u_int mask, + u_int field) +{ + u_int val = openpic2_read(addr); + openpic2_write(addr, (val & ~mask) | (field & mask)); +} + +static inline void openpic2_clearfield(volatile u_int *addr, u_int mask) +{ + openpic2_writefield(addr, mask, 0); +} + +static inline void openpic2_setfield(volatile u_int *addr, u_int mask) +{ + openpic2_writefield(addr, mask, mask); +} + +static void openpic2_safe_writefield(volatile u_int *addr, u_int mask, + u_int field) +{ + unsigned int loops = 100000; + + openpic2_setfield(addr, OPENPIC_MASK); + while (openpic2_read(addr) & OPENPIC_ACTIVITY) { + if (!loops--) { + printk(KERN_ERR "openpic2_safe_writefield timeout\n"); + break; + } + } + openpic2_writefield(addr, mask | OPENPIC_MASK, field | OPENPIC_MASK); +} + + +static inline void openpic2_reset(void) +{ + openpic2_setfield(&OpenPIC2->Global.Global_Configuration0, + OPENPIC_CONFIG_RESET); +} + +static void openpic2_disable_8259_pass_through(void) +{ + openpic2_setfield(&OpenPIC2->Global.Global_Configuration0, + OPENPIC_CONFIG_8259_PASSTHROUGH_DISABLE); +} + +/* + * Find out the current interrupt + */ +static u_int openpic2_irq(void) +{ + u_int vec; + DECL_THIS_CPU; + CHECK_THIS_CPU; + vec = openpic2_readfield(&OpenPIC2->THIS_CPU.Interrupt_Acknowledge, + OPENPIC_VECTOR_MASK); + return vec; +} + +static void openpic2_eoi(void) +{ + DECL_THIS_CPU; + CHECK_THIS_CPU; + openpic2_write(&OpenPIC2->THIS_CPU.EOI, 0); + /* Handle PCI write posting */ + (void)openpic2_read(&OpenPIC2->THIS_CPU.EOI); +} + + +static inline u_int openpic2_get_priority(void) +{ + DECL_THIS_CPU; + CHECK_THIS_CPU; + return openpic2_readfield(&OpenPIC2->THIS_CPU.Current_Task_Priority, + OPENPIC_CURRENT_TASK_PRIORITY_MASK); +} + +static void openpic2_set_priority(u_int pri) +{ + DECL_THIS_CPU; + CHECK_THIS_CPU; + openpic2_writefield(&OpenPIC2->THIS_CPU.Current_Task_Priority, + OPENPIC_CURRENT_TASK_PRIORITY_MASK, pri); +} + +/* + * Get/set the spurious vector + */ +static inline u_int openpic2_get_spurious(void) +{ + return openpic2_readfield(&OpenPIC2->Global.Spurious_Vector, + OPENPIC_VECTOR_MASK); +} + +static void openpic2_set_spurious(u_int vec) +{ + openpic2_writefield(&OpenPIC2->Global.Spurious_Vector, OPENPIC_VECTOR_MASK, + vec); +} + +/* + * Enable/disable an external interrupt source + * + * Externally called, irq is an offseted system-wide interrupt number + */ +static void openpic2_enable_irq(u_int irq) +{ + unsigned int loops = 100000; + + openpic2_clearfield(&GET_ISU(irq - open_pic2_irq_offset).Vector_Priority, OPENPIC_MASK); + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "openpic_enable_irq timeout\n"); + break; + } + + mb(); /* sync is probably useless here */ + } while(openpic2_readfield(&GET_ISU(irq - open_pic2_irq_offset).Vector_Priority, + OPENPIC_MASK)); +} + +static void openpic2_disable_irq(u_int irq) +{ + u32 vp; + unsigned int loops = 100000; + + openpic2_setfield(&GET_ISU(irq - open_pic2_irq_offset).Vector_Priority, + OPENPIC_MASK); + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "openpic_disable_irq timeout\n"); + break; + } + + mb(); /* sync is probably useless here */ + vp = openpic2_readfield(&GET_ISU(irq - open_pic2_irq_offset).Vector_Priority, + OPENPIC_MASK | OPENPIC_ACTIVITY); + } while((vp & OPENPIC_ACTIVITY) && !(vp & OPENPIC_MASK)); +} + +/* + * Initialize an interrupt source (and disable it!) + * + * irq: OpenPIC interrupt number + * pri: interrupt source priority + * vec: the vector it will produce + * pol: polarity (1 for positive, 0 for negative) + * sense: 1 for level, 0 for edge + */ +static void openpic2_initirq(u_int irq, u_int pri, u_int vec, int pol, int sense) +{ + openpic2_safe_writefield(&GET_ISU(irq).Vector_Priority, + OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK | + OPENPIC_SENSE_MASK | OPENPIC_POLARITY_MASK, + (pri << OPENPIC_PRIORITY_SHIFT) | vec | + (pol ? OPENPIC_POLARITY_POSITIVE : + OPENPIC_POLARITY_NEGATIVE) | + (sense ? OPENPIC_SENSE_LEVEL : OPENPIC_SENSE_EDGE)); +} + +/* + * Map an interrupt source to one or more CPUs + */ +static void openpic2_mapirq(u_int irq, u_int physmask) +{ + openpic2_write(&GET_ISU(irq).Destination, physmask); +} + +/* + * Set the sense for an interrupt source (and disable it!) + * + * sense: 1 for level, 0 for edge + */ +static inline void openpic2_set_sense(u_int irq, int sense) +{ + openpic2_safe_writefield(&GET_ISU(irq).Vector_Priority, + OPENPIC_SENSE_LEVEL, + (sense ? OPENPIC_SENSE_LEVEL : 0)); +} + +static void openpic2_end_irq(unsigned int irq_nr) +{ + openpic2_eoi(); +} + +int openpic2_get_irq(struct pt_regs *regs) +{ + int irq = openpic2_irq(); + + if (irq == openpic2_vec_spurious) + return -1; + return irq + open_pic2_irq_offset; +} + +struct hw_interrupt_type open_pic2 = { + " OpenPIC2 ", + NULL, + NULL, + openpic2_enable_irq, + openpic2_disable_irq, + NULL, + openpic2_end_irq, +}; + +void __init openpic2_init(int offset) +{ + u_int t, i; + const char *version; + + if (!OpenPIC2_Addr) { + printk(KERN_INFO "No OpenPIC2 found !\n"); + return; + } + OpenPIC2 = (volatile struct OpenPIC *)OpenPIC2_Addr; + + ppc64_boot_msg(0x20, "OpenPic U3 Init"); + + t = openpic2_read(&OpenPIC2->Global.Feature_Reporting0); + switch (t & OPENPIC_FEATURE_VERSION_MASK) { + case 1: + version = "1.0"; + break; + case 2: + version = "1.2"; + break; + case 3: + version = "1.3"; + break; + default: + version = "?"; + break; + } + printk(KERN_INFO "OpenPIC (U3) Version %s\n", version); + + open_pic2_irq_offset = offset; + + for (i=0; i<128; i+=0x10) { + ISU2[i>>4] = &((struct OpenPIC *)OpenPIC2_Addr)->Source[i]; + NumISUs++; + } + NumSources = NumISUs * 0x10; + openpic2_vec_spurious = NumSources; + + openpic2_set_priority(0xf); + + /* Init all external sources */ + for (i = 0; i < NumSources; i++) { + int pri, sense; + + /* the bootloader may have left it enabled (bad !) */ + openpic2_disable_irq(i+offset); + + pri = 8; + sense = (i < OpenPIC_NumInitSenses) ? OpenPIC_InitSenses[i]: 1; + if (sense) + irq_desc[i+offset].status = IRQ_LEVEL; + + /* Enabled, Priority 8 or 9 */ + openpic2_initirq(i, pri, i, !sense, sense); + /* Processor 0 */ + openpic2_mapirq(i, 0x1); + } + + /* Init descriptors */ + for (i = offset; i < NumSources + offset; i++) + irq_desc[i].handler = &open_pic2; + + /* Initialize the spurious interrupt */ + openpic2_set_spurious(openpic2_vec_spurious); + + openpic2_set_priority(0); + openpic2_disable_8259_pass_through(); + + ppc64_boot_msg(0x25, "OpenPic2 Done"); +} --- linux-2.6.3-rc2/arch/ppc64/kernel/pci.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/pci.c 2004-02-12 00:39:25.000000000 -0800 @@ -34,6 +34,7 @@ #include #include #include +#include #include "pci.h" @@ -58,21 +59,32 @@ void pcibios_name_device(struct pci_dev* void pcibios_final_fixup(void); static void fixup_broken_pcnet32(struct pci_dev* dev); static void fixup_windbond_82c105(struct pci_dev* dev); +extern void fixup_k2_sata(struct pci_dev* dev); void iSeries_pcibios_init(void); struct pci_controller *hose_head; struct pci_controller **hose_tail = &hose_head; +struct pci_dma_ops pci_dma_ops; +EXPORT_SYMBOL(pci_dma_ops); + int global_phb_number; /* Global phb counter */ /* Cached ISA bridge dev. */ struct pci_dev *ppc64_isabridge_dev = NULL; struct pci_fixup pcibios_fixups[] = { - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32 }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, fixup_windbond_82c105 }, - { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, + fixup_broken_pcnet32 }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + fixup_windbond_82c105 }, + { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, + pcibios_name_device }, +#ifdef CONFIG_PPC_PMAC + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SERVERWORKS, 0x0240, + fixup_k2_sata }, +#endif { 0 } }; @@ -250,6 +262,9 @@ pci_alloc_pci_controller(enum phb_types case phb_type_winnipeg: model = "PHB WP"; break; + case phb_type_apple: + model = "PHB APPLE"; + break; default: model = "PHB UK"; break; @@ -332,8 +347,9 @@ static int __init pcibios_init(void) pci_assign_unassigned_resources(); #endif - /* Call machine dependent fixup */ - pcibios_final_fixup(); + /* Call machine dependent final fixup */ + if (ppc_md.pcibios_fixup) + ppc_md.pcibios_fixup(); /* Cache the location of the ISA bridge (if we have one) */ ppc64_isabridge_dev = pci_find_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); @@ -540,3 +556,25 @@ int pci_mmap_page_range(struct pci_dev * return ret; } + +#ifdef CONFIG_PPC_PSERIES +static ssize_t pci_show_devspec(struct device *dev, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); +#endif /* CONFIG_PPC_PSERIES */ + +void pcibios_add_platform_entries(struct pci_dev *pdev) +{ +#ifdef CONFIG_PPC_PSERIES + device_create_file(&pdev->dev, &dev_attr_devspec); +#endif /* CONFIG_PPC_PSERIES */ +} --- linux-2.6.3-rc2/arch/ppc64/kernel/pci_dma.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/pci_dma.c 2004-02-12 00:39:25.000000000 -0800 @@ -1002,7 +1002,7 @@ static void getTceTableParmsPSeriesLP(st * Returns the virtual address of the buffer and sets dma_handle * to the dma address (tce) of the first page. */ -void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, +static void *tce_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) { struct TceTable * tbl; @@ -1055,7 +1055,7 @@ void *pci_alloc_consistent(struct pci_de return ret; } -void pci_free_consistent(struct pci_dev *hwdev, size_t size, +static void tce_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) { struct TceTable * tbl; @@ -1089,7 +1089,7 @@ void pci_free_consistent(struct pci_dev * need not be page aligned, the dma_addr_t returned will point to the same * byte within the page as vaddr. */ -dma_addr_t pci_map_single(struct pci_dev *hwdev, void *vaddr, +static dma_addr_t tce_map_single(struct pci_dev *hwdev, void *vaddr, size_t size, int direction ) { struct TceTable * tbl; @@ -1124,7 +1124,7 @@ dma_addr_t pci_map_single(struct pci_dev return dma_handle; } -void pci_unmap_single( struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction ) +static void tce_unmap_single( struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction ) { struct TceTable * tbl; unsigned order, nPages; @@ -1354,7 +1354,7 @@ static dma_addr_t create_tces_sg( struct return dmaAddr; } -int pci_map_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction ) +static int tce_map_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction ) { struct TceTable * tbl; unsigned numTces; @@ -1389,7 +1389,7 @@ int pci_map_sg( struct pci_dev *hwdev, s return num_dma; } -void pci_unmap_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nelms, int direction ) +static void tce_unmap_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nelms, int direction ) { struct TceTable * tbl; unsigned order, numTces, i; @@ -1430,7 +1430,7 @@ void pci_unmap_sg( struct pci_dev *hwdev } #else -int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, +static int tce_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { int i; @@ -1448,7 +1448,7 @@ int pci_map_sg(struct pci_dev *pdev, str return nelems; } -void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, +static void tce_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { while (nelems--) { @@ -1465,7 +1465,15 @@ void tce_init_pSeries(void) { ppc_md.tce_build = tce_build_pSeries; ppc_md.tce_free_one = tce_free_one_pSeries; + + pci_dma_ops.pci_alloc_consistent = tce_alloc_consistent; + pci_dma_ops.pci_free_consistent = tce_free_consistent; + pci_dma_ops.pci_map_single = tce_map_single; + pci_dma_ops.pci_unmap_single = tce_unmap_single; + pci_dma_ops.pci_map_sg = tce_map_sg; + pci_dma_ops.pci_unmap_sg = tce_unmap_sg; } + #endif #ifdef CONFIG_PPC_ISERIES @@ -1473,5 +1481,12 @@ void tce_init_iSeries(void) { ppc_md.tce_build = tce_build_iSeries; ppc_md.tce_free_one = tce_free_one_iSeries; + + pci_dma_ops.pci_alloc_consistent = tce_alloc_consistent; + pci_dma_ops.pci_free_consistent = tce_free_consistent; + pci_dma_ops.pci_map_single = tce_map_single; + pci_dma_ops.pci_unmap_single = tce_unmap_single; + pci_dma_ops.pci_map_sg = tce_map_sg; + pci_dma_ops.pci_unmap_sg = tce_unmap_sg; } #endif --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pci_dma_direct.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,88 @@ +/* + * Support for DMA from PCI devices to main memory on + * machines without an iommu or with directly addressable + * RAM (typically a pmac with 2Gb of RAM or less) + * + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +static void *pci_direct_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle) +{ + void *ret; + + ret = (void *)__get_free_pages(GFP_ATOMIC, get_order(size)); + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_absolute((unsigned long)ret); + } + return ret; +} + +static void pci_direct_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + +static dma_addr_t pci_direct_map_single(struct pci_dev *hwdev, void *ptr, + size_t size, int direction) +{ + return virt_to_absolute((unsigned long)ptr); +} + +static void pci_direct_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, + size_t size, int direction) +{ +} + +static int pci_direct_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + int i; + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = page_to_phys(sg->page) + sg->offset; + sg->dma_length = sg->length; + } + + return nents; +} + +static void pci_direct_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction) +{ +} + +void __init pci_dma_init_direct(void) +{ + pci_dma_ops.pci_alloc_consistent = pci_direct_alloc_consistent; + pci_dma_ops.pci_free_consistent = pci_direct_free_consistent; + pci_dma_ops.pci_map_single = pci_direct_map_single; + pci_dma_ops.pci_unmap_single = pci_direct_unmap_single; + pci_dma_ops.pci_map_sg = pci_direct_map_sg; + pci_dma_ops.pci_unmap_sg = pci_direct_unmap_sg; +} --- linux-2.6.3-rc2/arch/ppc64/kernel/pci_dn.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/pci_dn.c 2004-02-12 00:39:25.000000000 -0800 @@ -181,6 +181,7 @@ struct device_node *fetch_dev_dn(struct } return dn; } +EXPORT_SYMBOL(fetch_dev_dn); /****************************************************************** --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_feature.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,654 @@ +/* + * arch/ppc/platforms/pmac_feature.c + * + * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au) + * Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TODO: + * + * - Replace mdelay with some schedule loop if possible + * - Shorten some obfuscated delays on some routines (like modem + * power) + * - Refcount some clocks (see darwin) + * - Split split split... + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_FEATURE + +#ifdef DEBUG_FEATURE +#define DBG(fmt,...) printk(KERN_DEBUG fmt) +#else +#define DBG(fmt,...) +#endif + +/* + * We use a single global lock to protect accesses. Each driver has + * to take care of its own locking + */ +static spinlock_t feature_lock __pmacdata = SPIN_LOCK_UNLOCKED; + +#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); + + +/* + * Instance of some macio stuffs + */ +struct macio_chip macio_chips[MAX_MACIO_CHIPS] __pmacdata; + +struct macio_chip* __pmac +macio_find(struct device_node* child, int type) +{ + while(child) { + int i; + + for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++) + if (child == macio_chips[i].of_node && + (!type || macio_chips[i].type == type)) + return &macio_chips[i]; + child = child->parent; + } + return NULL; +} + +static const char* macio_names[] __pmacdata = +{ + "Unknown", + "Grand Central", + "OHare", + "OHareII", + "Heathrow", + "Gatwick", + "Paddington", + "Keylargo", + "Pangea", + "Intrepid", + "K2" +}; + + + +/* + * Uninorth reg. access. Note that Uni-N regs are big endian + */ + +#define UN_REG(r) (uninorth_base + ((r) >> 2)) +#define UN_IN(r) (in_be32(UN_REG(r))) +#define UN_OUT(r,v) (out_be32(UN_REG(r), (v))) +#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) +#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) + +static struct device_node* uninorth_node __pmacdata; +static u32* uninorth_base __pmacdata; +static u32 uninorth_rev __pmacdata; +static void *u3_ht; + +extern struct pci_dev *k2_skiplist[2]; + +/* + * For each motherboard family, we have a table of functions pointers + * that handle the various features. + */ + +typedef long (*feature_call)(struct device_node* node, long param, long value); + +struct feature_table_entry { + unsigned int selector; + feature_call function; +}; + +struct pmac_mb_def +{ + const char* model_string; + const char* model_name; + int model_id; + struct feature_table_entry* features; + unsigned long board_flags; +}; +static struct pmac_mb_def pmac_mb __pmacdata; + +/* + * Here are the chip specific feature functions + */ + + +static long __pmac g5_read_gpio(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + return MACIO_IN8(param); +} + + +static long __pmac g5_write_gpio(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + MACIO_OUT8(param, (u8)(value & 0xff)); + return 0; +} + +static long __pmac g5_gmac_enable(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + unsigned long flags; + struct pci_dev *pdev = NULL; + + if (node == NULL) + return -ENODEV; + + /* XXX FIXME: We should fix pci_device_from_OF_node here, and + * get to a real pci_dev or we'll get into trouble with PCI + * domains the day we get overlapping numbers (like if we ever + * decide to show the HT root. + * Note that we only get the slot when value is 0. This is called + * early during boot with value 1 to enable all devices, at which + * point, we don't yet have probed pci_find_slot, so it would fail + * to look for the slot at this point. + */ + if (!value) + pdev = pci_find_slot(node->busno, node->devfn); + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + mb(); + k2_skiplist[0] = NULL; + } else { + k2_skiplist[0] = pdev; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long __pmac g5_fw_enable(struct device_node* node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + unsigned long flags; + struct pci_dev *pdev = NULL; + + /* XXX FIXME: We should fix pci_device_from_OF_node here, and + * get to a real pci_dev or we'll get into trouble with PCI + * domains the day we get overlapping numbers (like if we ever + * decide to show the HT root + * Note that we only get the slot when value is 0. This is called + * early during boot with value 1 to enable all devices, at which + * point, we don't yet have probed pci_find_slot, so it would fail + * to look for the slot at this point. + */ + if (node == NULL) + return -ENODEV; + + if (!value) + pdev = pci_find_slot(node->busno, node->devfn); + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + mb(); + k2_skiplist[1] = NULL; + } else { + k2_skiplist[0] = pdev; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long __pmac g5_mpic_enable(struct device_node* node, long param, long value) +{ + unsigned long flags; + + if (node->parent == NULL || strcmp(node->parent->name, "u3")) + return 0; + + LOCK(flags); + UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE); + UNLOCK(flags); + + return 0; +} + +#ifdef CONFIG_SMP +static long __pmac g5_reset_cpu(struct device_node* node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip* macio; + struct device_node* np; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo2) + return -ENODEV; + + np = find_path_device("/cpus"); + if (np == NULL) + return -ENODEV; + for (np = np->child; np != NULL; np = np->sibling) { + u32* num = (u32 *)get_property(np, "reg", NULL); + u32* rst = (u32 *)get_property(np, "soft-reset", NULL); + if (num == NULL || rst == NULL) + continue; + if (param == *num) { + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * This can be called from pmac_smp so isn't static + * + * This takes the second CPU off the bus on dual CPU machines + * running UP + */ +void __pmac g5_phy_disable_cpu1(void) +{ + UN_OUT(U3_API_PHY_CONFIG_1, 0); +} + +static long __pmac generic_get_mb_info(struct device_node* node, long param, long value) +{ + switch(param) { + case PMAC_MB_INFO_MODEL: + return pmac_mb.model_id; + case PMAC_MB_INFO_FLAGS: + return pmac_mb.board_flags; + case PMAC_MB_INFO_NAME: + /* hack hack hack... but should work */ + *((const char **)value) = pmac_mb.model_name; + return 0; + } + return -EINVAL; +} + + +/* + * Table definitions + */ + +/* Used on any machine + */ +static struct feature_table_entry any_features[] __pmacdata = { + { PMAC_FTR_GET_MB_INFO, generic_get_mb_info }, + { 0, NULL } +}; + +/* G5 features + */ +static struct feature_table_entry g5_features[] __pmacdata = { + { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable }, + { PMAC_FTR_1394_ENABLE, g5_fw_enable }, + { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable }, + { PMAC_FTR_READ_GPIO, g5_read_gpio }, + { PMAC_FTR_WRITE_GPIO, g5_write_gpio }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, g5_reset_cpu }, +#endif /* CONFIG_SMP */ + { 0, NULL } +}; + +static struct pmac_mb_def pmac_mb_defs[] __pmacdata = { + { "PowerMac7,2", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, +}; + +/* + * The toplevel feature_call callback + */ +long __pmac pmac_do_feature_call(unsigned int selector, ...) +{ + struct device_node* node; + long param, value; + int i; + feature_call func = NULL; + va_list args; + + if (pmac_mb.features) + for (i=0; pmac_mb.features[i].function; i++) + if (pmac_mb.features[i].selector == selector) { + func = pmac_mb.features[i].function; + break; + } + if (!func) + for (i=0; any_features[i].function; i++) + if (any_features[i].selector == selector) { + func = any_features[i].function; + break; + } + if (!func) + return -ENODEV; + + va_start(args, selector); + node = (struct device_node*)va_arg(args, void*); + param = va_arg(args, long); + value = va_arg(args, long); + va_end(args); + + return func(node, param, value); +} + +static int __init probe_motherboard(void) +{ + int i; + struct macio_chip* macio = &macio_chips[0]; + const char* model = NULL; + struct device_node *dt; + + /* Lookup known motherboard type in device-tree. First try an + * exact match on the "model" property, then try a "compatible" + * match is none is found. + */ + dt = find_devices("device-tree"); + if (dt != NULL) + model = (const char *) get_property(dt, "model", NULL); + for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (strcmp(model, pmac_mb_defs[i].model_string) == 0) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (machine_is_compatible(pmac_mb_defs[i].model_string)) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + + /* Fallback to selection depending on mac-io chip type */ + switch(macio->type) { + case macio_keylargo2: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; + pmac_mb.model_name = "Unknown K2-based"; + pmac_mb.features = g5_features; + + default: + return -ENODEV; + } +found: + /* Check for "mobile" machine */ + if (model && (strncmp(model, "PowerBook", 9) == 0 + || strncmp(model, "iBook", 5) == 0)) + pmac_mb.board_flags |= PMAC_MB_MOBILE; + + + printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name); + return 0; +} + +/* Initialize the Core99 UniNorth host bridge and memory controller + */ +static void __init probe_uninorth(void) +{ + uninorth_node = of_find_node_by_name(NULL, "u3"); + if (uninorth_node && uninorth_node->n_addrs > 0) { + /* Small hack until I figure out if parsing in prom.c is correct. I should + * get rid of those pre-parsed junk anyway + */ + unsigned long address = uninorth_node->addrs[0].address; + uninorth_base = ioremap(address, 0x40000); + uninorth_rev = in_be32(UN_REG(UNI_N_VERSION)); + u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000); + } else + uninorth_node = NULL; + + if (!uninorth_node) + return; + + printk(KERN_INFO "Found U3 memory controller & host bridge, revision: %d\n", + uninorth_rev); + printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base); + +} + +static void __init probe_one_macio(const char* name, const char* compat, int type) +{ + struct device_node* node; + int i; + volatile u32* base; + u32* revp; + + node = find_devices(name); + if (!node || !node->n_addrs) + return; + if (compat) + do { + if (device_is_compatible(node, compat)) + break; + node = node->next; + } while (node); + if (!node) + return; + for(i=0; i= MAX_MACIO_CHIPS) { + printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n"); + printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name); + return; + } + base = (volatile u32*)ioremap(node->addrs[0].address, node->addrs[0].size); + if (!base) { + printk(KERN_ERR "pmac_feature: Can't map mac-io chip !\n"); + return; + } + if (type == macio_keylargo) { + u32* did = (u32 *)get_property(node, "device-id", NULL); + if (*did == 0x00000025) + type = macio_pangea; + if (*did == 0x0000003e) + type = macio_intrepid; + } + macio_chips[i].of_node = node; + macio_chips[i].type = type; + macio_chips[i].base = base; + macio_chips[i].flags = MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON; + macio_chips[i].name = macio_names[type]; + revp = (u32 *)get_property(node, "revision-id", NULL); + if (revp) + macio_chips[i].rev = *revp; + printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", + macio_names[type], macio_chips[i].rev, macio_chips[i].base); +} + +static int __init +probe_macios(void) +{ + probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2); + + macio_chips[0].lbus.index = 0; + macio_chips[1].lbus.index = 1; + + return (macio_chips[0].of_node == NULL) ? -ENODEV : 0; +} + +static void __init +set_initial_features(void) +{ + struct device_node *np; + + if (macio_chips[0].type == macio_keylargo2) { +#ifndef CONFIG_SMP + /* On SMP machines running UP, we have the second CPU eating + * bus cycles. We need to take it off the bus. This is done + * from pmac_smp for SMP kernels running on one CPU + */ + np = of_find_node_by_type(NULL, "cpu"); + if (np != NULL) + np = of_find_node_by_type(np, "cpu"); + if (np != NULL) { + g5_phy_disable_cpu1(); + of_node_put(np); + } +#endif /* CONFIG_SMP */ + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + np = of_find_node_by_name(NULL, "ethernet"); + while(np) { + if (device_is_compatible(np, "K2-GMAC")) + g5_gmac_enable(np, 0, 1); + np = of_find_node_by_name(np, "ethernet"); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + np = of_find_node_by_name(NULL, "firewire"); + while(np) { + if (device_is_compatible(np, "pci106b,5811")) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + g5_fw_enable(np, 0, 1); + } + np = of_find_node_by_name(np, "firewire"); + } + } +} + +void __init +pmac_feature_init(void) +{ + /* Detect the UniNorth memory controller */ + probe_uninorth(); + + /* Probe mac-io controllers */ + if (probe_macios()) { + printk(KERN_WARNING "No mac-io chip found\n"); + return; + } + + /* Setup low-level i2c stuffs */ + pmac_init_low_i2c(); + + /* Probe machine type */ + if (probe_motherboard()) + printk(KERN_WARNING "Unknown PowerMac !\n"); + + /* Set some initial features (turn off some chips that will + * be later turned on) + */ + set_initial_features(); +} + +int __init pmac_feature_late_init(void) +{ +#if 0 + struct device_node* np; + + /* Request some resources late */ + if (uninorth_node) + request_OF_resource(uninorth_node, 0, NULL); + np = find_devices("hammerhead"); + if (np) + request_OF_resource(np, 0, NULL); + np = find_devices("interrupt-controller"); + if (np) + request_OF_resource(np, 0, NULL); +#endif + return 0; +} + +device_initcall(pmac_feature_late_init); + + +static void dump_HT_speeds(char *name, u32 cfg, u32 frq) +{ + int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 }; + int bits[8] = { 8,16,0,32,2,4,0,0 }; + int freq = (frq >> 8) & 0xf; + + if (freqs[freq] == 0) + printk("%s: Unknown HT link frequency %x\n", name, freq); + else + printk("%s: %d MHz on main link, (%d in / %d out) bits width\n", + name, freqs[freq], + bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]); +} + +void __init pmac_check_ht_link(void) +{ +#if 0 /* Disabled for now */ + u32 ufreq, freq, ucfg, cfg; + struct device_node *pcix_node; + u8 px_bus, px_devfn; + struct pci_controller *px_hose; + + (void)in_be32(u3_ht + U3_HT_LINK_COMMAND); + ucfg = cfg = in_be32(u3_ht + U3_HT_LINK_CONFIG); + ufreq = freq = in_be32(u3_ht + U3_HT_LINK_FREQ); + dump_HT_speeds("U3 HyperTransport", cfg, freq); + + pcix_node = of_find_compatible_node(NULL, "pci", "pci-x"); + if (pcix_node == NULL) { + printk("No PCI-X bridge found\n"); + return; + } + px_hose = pcix_node->phb; + px_bus = pcix_node->busno; + px_devfn = pcix_node->devfn; + + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq); + dump_HT_speeds("PCI-X HT Uplink", cfg, freq); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq); + dump_HT_speeds("PCI-X HT Downlink", cfg, freq); +#endif +} --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac.h 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,32 @@ +#ifndef __PMAC_H__ +#define __PMAC_H__ + +#include +#include + +/* + * Declaration for the various functions exported by the + * pmac_* files. Mostly for use by pmac_setup + */ + +extern void pmac_get_boot_time(struct rtc_time *tm); +extern void pmac_get_rtc_time(struct rtc_time *tm); +extern int pmac_set_rtc_time(struct rtc_time *tm); +extern void pmac_read_rtc_time(void); +extern void pmac_calibrate_decr(void); + +extern void pmac_pcibios_fixup(void); +extern void pmac_pci_init(void); +extern void pmac_setup_pci_dma(void); +extern void fixup_k2_sata(struct pci_dev* dev); +extern void pmac_check_ht_link(void); + +extern void pmac_setup_smp(void); + +extern unsigned long pmac_ide_get_base(int index); +extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, + unsigned long data_port, unsigned long ctrl_port, int *irq); + +extern void pmac_nvram_init(void); + +#endif /* __PMAC_H__ */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_low_i2c.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,513 @@ +/* + * arch/ppc/platforms/pmac_low_i2c.c + * + * Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains some low-level i2c access routines that + * need to be used by various bits of the PowerMac platform code + * at times where the real asynchronous & interrupt driven driver + * cannot be used. The API borrows some semantics from the darwin + * driver in order to ease the implementation of the platform + * properties parser + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_LOW_I2C_HOST 4 + +#if 1 +#define DBG(x...) do {\ + printk(KERN_DEBUG "KW:" x); \ + } while(0) +#else +#define DBGG(x...) +#endif + +struct low_i2c_host; + +typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len); + +struct low_i2c_host +{ + struct device_node *np; /* OF device node */ + struct semaphore mutex; /* Access mutex for use by i2c-keywest */ + low_i2c_func_t func; /* Access function */ + int is_open : 1; /* Poor man's access control */ + int mode; /* Current mode */ + int channel; /* Current channel */ + int num_channels; /* Number of channels */ + unsigned long base; /* For keywest-i2c, base address */ + int bsteps; /* And register stepping */ + int speed; /* And speed */ +}; + +static struct low_i2c_host low_i2c_hosts[MAX_LOW_I2C_HOST]; + +/* No locking is necessary on allocation, we are running way before + * anything can race with us + */ +static struct low_i2c_host *find_low_i2c_host(struct device_node *np) +{ + int i; + + for (i = 0; i < MAX_LOW_I2C_HOST; i++) + if (low_i2c_hosts[i].np == np) + return &low_i2c_hosts[i]; + return NULL; +} + +/* + * + * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's) + * + */ + +/* + * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h, + * should be moved somewhere in include/asm-ppc/ + */ +/* Register indices */ +typedef enum { + reg_mode = 0, + reg_control, + reg_status, + reg_isr, + reg_ier, + reg_addr, + reg_subaddr, + reg_data +} reg_t; + + +/* Mode register */ +#define KW_I2C_MODE_100KHZ 0x00 +#define KW_I2C_MODE_50KHZ 0x01 +#define KW_I2C_MODE_25KHZ 0x02 +#define KW_I2C_MODE_DUMB 0x00 +#define KW_I2C_MODE_STANDARD 0x04 +#define KW_I2C_MODE_STANDARDSUB 0x08 +#define KW_I2C_MODE_COMBINED 0x0C +#define KW_I2C_MODE_MODE_MASK 0x0C +#define KW_I2C_MODE_CHAN_MASK 0xF0 + +/* Control register */ +#define KW_I2C_CTL_AAK 0x01 +#define KW_I2C_CTL_XADDR 0x02 +#define KW_I2C_CTL_STOP 0x04 +#define KW_I2C_CTL_START 0x08 + +/* Status register */ +#define KW_I2C_STAT_BUSY 0x01 +#define KW_I2C_STAT_LAST_AAK 0x02 +#define KW_I2C_STAT_LAST_RW 0x04 +#define KW_I2C_STAT_SDA 0x08 +#define KW_I2C_STAT_SCL 0x10 + +/* IER & ISR registers */ +#define KW_I2C_IRQ_DATA 0x01 +#define KW_I2C_IRQ_ADDR 0x02 +#define KW_I2C_IRQ_STOP 0x04 +#define KW_I2C_IRQ_START 0x08 +#define KW_I2C_IRQ_MASK 0x0F + +/* State machine states */ +enum { + state_idle, + state_addr, + state_read, + state_write, + state_stop, + state_dead +}; + +#define WRONG_STATE(name) do {\ + printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s (isr: %02x)\n", \ + name, __kw_state_names[state], isr); \ + } while(0) + +static const char *__kw_state_names[] = { + "state_idle", + "state_addr", + "state_read", + "state_write", + "state_stop", + "state_dead" +}; + +static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg) +{ + return in_8(((volatile u8 *)host->base) + + (((unsigned)reg) << host->bsteps)); +} + +static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val) +{ + out_8(((volatile u8 *)host->base) + + (((unsigned)reg) << host->bsteps), val); + (void)__kw_read_reg(host, reg_subaddr); +} + +#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val) +#define kw_read_reg(reg) __kw_read_reg(host, reg) + + +/* Don't schedule, the g5 fan controller is too + * timing sensitive + */ +static u8 kw_wait_interrupt(struct low_i2c_host* host) +{ + int i; + u8 isr; + + for (i = 0; i < 200000; i++) { + isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK; + if (isr != 0) + return isr; + udelay(1); + } + return isr; +} + +static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr) +{ + u8 ack; + + if (isr == 0) { + if (state != state_stop) { + DBG("KW: Timeout !\n"); + *rc = -EIO; + goto stop; + } + if (state == state_stop) { + ack = kw_read_reg(reg_status); + if (!(ack & KW_I2C_STAT_BUSY)) { + state = state_idle; + kw_write_reg(reg_ier, 0x00); + } + } + return state; + } + + if (isr & KW_I2C_IRQ_ADDR) { + ack = kw_read_reg(reg_status); + if (state != state_addr) { + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + WRONG_STATE("KW_I2C_IRQ_ADDR"); + *rc = -EIO; + goto stop; + } + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + *rc = -ENODEV; + DBG("KW: NAK on address\n"); + return state_stop; + } else { + if (rw) { + state = state_read; + if (*len > 1) + kw_write_reg(reg_control, KW_I2C_CTL_AAK); + } else { + state = state_write; + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } + } + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + } + + if (isr & KW_I2C_IRQ_DATA) { + if (state == state_read) { + **data = kw_read_reg(reg_data); + (*data)++; (*len)--; + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + if ((*len) == 0) + state = state_stop; + else if ((*len) == 1) + kw_write_reg(reg_control, 0); + } else if (state == state_write) { + ack = kw_read_reg(reg_status); + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + DBG("KW: nack on data write\n"); + *rc = -EIO; + goto stop; + } else if (*len) { + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } else { + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + state = state_stop; + *rc = 0; + } + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + } else { + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + WRONG_STATE("KW_I2C_IRQ_DATA"); + if (state != state_stop) { + *rc = -EIO; + goto stop; + } + } + } + + if (isr & KW_I2C_IRQ_STOP) { + kw_write_reg(reg_isr, KW_I2C_IRQ_STOP); + if (state != state_stop) { + WRONG_STATE("KW_I2C_IRQ_STOP"); + *rc = -EIO; + } + return state_idle; + } + + if (isr & KW_I2C_IRQ_START) + kw_write_reg(reg_isr, KW_I2C_IRQ_START); + + return state; + + stop: + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + return state_stop; +} + +static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len) +{ + u8 mode_reg = host->speed; + int state = state_addr; + int rc = 0; + + /* Setup mode & subaddress if any */ + switch(host->mode) { + case pmac_low_i2c_mode_dumb: + printk(KERN_ERR "low_i2c: Dumb mode not supported !\n"); + return -EINVAL; + case pmac_low_i2c_mode_std: + mode_reg |= KW_I2C_MODE_STANDARD; + break; + case pmac_low_i2c_mode_stdsub: + mode_reg |= KW_I2C_MODE_STANDARDSUB; + kw_write_reg(reg_subaddr, subaddr); + break; + case pmac_low_i2c_mode_combined: + mode_reg |= KW_I2C_MODE_COMBINED; + kw_write_reg(reg_subaddr, subaddr); + break; + } + + /* Setup channel & clear pending irqs */ + kw_write_reg(reg_isr, kw_read_reg(reg_isr)); + kw_write_reg(reg_mode, mode_reg | (host->channel << 4)); + kw_write_reg(reg_status, 0); + + /* Set up address and r/w bit */ + kw_write_reg(reg_addr, addr); + + /* Start sending address & disable interrupt*/ + kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/); + kw_write_reg(reg_control, KW_I2C_CTL_XADDR); + + /* State machine, to turn into an interrupt handler */ + while(state != state_idle) { + u8 isr = kw_wait_interrupt(host); + state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr); + } + + return rc; +} + +static void keywest_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + unsigned long *psteps, *prate, steps, aoffset = 0; + struct device_node *parent; + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + psteps = (unsigned long *)get_property(np, "AAPL,address-step", NULL); + steps = psteps ? (*psteps) : 0x10; + for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) + steps >>= 1; + parent = of_get_parent(np); + host->num_channels = 1; + if (parent && parent->name[0] == 'u') { + host->num_channels = 2; + aoffset = 3; + } + /* Select interface rate */ + host->speed = KW_I2C_MODE_100KHZ; + prate = (unsigned long *)get_property(np, "AAPL,i2c-rate", NULL); + if (prate) switch(*prate) { + case 100: + host->speed = KW_I2C_MODE_100KHZ; + break; + case 50: + host->speed = KW_I2C_MODE_50KHZ; + break; + case 25: + host->speed = KW_I2C_MODE_25KHZ; + break; + } + host->mode = pmac_low_i2c_mode_std; + host->base = (unsigned long)ioremap(np->addrs[0].address + aoffset, + np->addrs[0].size); + host->func = keywest_low_i2c_func; +} + +/* + * + * PMU implementation + * + */ + + +#ifdef CONFIG_ADB_PMU + +static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len) +{ + // TODO + return -ENODEV; +} + +static void pmu_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + host->num_channels = 3; + host->mode = pmac_low_i2c_mode_std; + host->func = pmu_low_i2c_func; +} + +#endif /* CONFIG_ADB_PMU */ + +void __init pmac_init_low_i2c(void) +{ + struct device_node *np; + + /* Probe keywest-i2c busses */ + np = of_find_compatible_node(NULL, "i2c", "keywest-i2c"); + while(np) { + keywest_low_i2c_add(np); + np = of_find_compatible_node(np, "i2c", "keywest-i2c"); + } + +#ifdef CONFIG_ADB_PMU + /* Probe PMU busses */ + np = of_find_node_by_name(NULL, "via-pmu"); + if (np) + pmu_low_i2c_add(np); +#endif /* CONFIG_ADB_PMU */ + + /* TODO: Add CUDA support as well */ +} + +int pmac_low_i2c_lock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + down(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_lock); + +int pmac_low_i2c_unlock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + up(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_unlock); + + +int pmac_low_i2c_open(struct device_node *np, int channel) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + if (channel >= host->num_channels) + return -EINVAL; + + down(&host->mutex); + host->is_open = 1; + host->channel = channel; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_open); + +int pmac_low_i2c_close(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + host->is_open = 0; + up(&host->mutex); + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_close); + +int pmac_low_i2c_setmode(struct device_node *np, int mode) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + host->mode = mode; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_setmode); + +int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + + return host->func(host, addrdir, subaddr, data, len); +} +EXPORT_SYMBOL(pmac_low_i2c_xfer); + --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_nvram.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,495 @@ +/* + * arch/ppc/platforms/pmac_nvram.c + * + * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Todo: - add support for the OF persistent properties + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */ + +#define CORE99_SIGNATURE 0x5a +#define CORE99_ADLER_START 0x14 + +/* On Core99, nvram is either a sharp, a micron or an AMD flash */ +#define SM_FLASH_STATUS_DONE 0x80 +#define SM_FLASH_STATUS_ERR 0x38 + +#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0 +#define SM_FLASH_CMD_ERASE_SETUP 0x20 +#define SM_FLASH_CMD_RESET 0xff +#define SM_FLASH_CMD_WRITE_SETUP 0x40 +#define SM_FLASH_CMD_CLEAR_STATUS 0x50 +#define SM_FLASH_CMD_READ_STATUS 0x70 + +/* CHRP NVRAM header */ +struct chrp_header { + u8 signature; + u8 cksum; + u16 len; + char name[12]; + u8 data[0]; +}; + +struct core99_header { + struct chrp_header hdr; + u32 adler; + u32 generation; + u32 reserved[2]; +}; + +/* + * Read and write the non-volatile RAM on PowerMacs and CHRP machines. + */ +static volatile unsigned char *nvram_data; +static int core99_bank = 0; +// XXX Turn that into a sem +static spinlock_t nv_lock = SPIN_LOCK_UNLOCKED; + +extern int system_running; + +static int (*core99_write_bank)(int bank, u8* datas); +static int (*core99_erase_bank)(int bank); + +static char *nvram_image __pmacdata; + + +static ssize_t __pmac core99_nvram_read(char *buf, size_t count, loff_t *index) +{ + int i; + + if (nvram_image == NULL) + return -ENODEV; + if (*index > NVRAM_SIZE) + return 0; + + i = *index; + if (i + count > NVRAM_SIZE) + count = NVRAM_SIZE - i; + + memcpy(buf, &nvram_image[i], count); + *index = i + count; + return count; +} + +static ssize_t __pmac core99_nvram_write(char *buf, size_t count, loff_t *index) +{ + int i; + + if (nvram_image == NULL) + return -ENODEV; + if (*index > NVRAM_SIZE) + return 0; + + i = *index; + if (i + count > NVRAM_SIZE) + count = NVRAM_SIZE - i; + + memcpy(&nvram_image[i], buf, count); + *index = i + count; + return count; +} + +static ssize_t __pmac core99_nvram_size(void) +{ + if (nvram_image == NULL) + return -ENODEV; + return NVRAM_SIZE; +} + +static u8 __pmac chrp_checksum(struct chrp_header* hdr) +{ + u8 *ptr; + u16 sum = hdr->signature; + for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++) + sum += *ptr; + while (sum > 0xFF) + sum = (sum & 0xFF) + (sum>>8); + return sum; +} + +static u32 __pmac core99_calc_adler(u8 *buffer) +{ + int cnt; + u32 low, high; + + buffer += CORE99_ADLER_START; + low = 1; + high = 0; + for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) { + if ((cnt % 5000) == 0) { + high %= 65521UL; + high %= 65521UL; + } + low += buffer[cnt]; + high += low; + } + low %= 65521UL; + high %= 65521UL; + + return (high << 16) | low; +} + +static u32 __pmac core99_check(u8* datas) +{ + struct core99_header* hdr99 = (struct core99_header*)datas; + + if (hdr99->hdr.signature != CORE99_SIGNATURE) { + DBG("Invalid signature\n"); + return 0; + } + if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) { + DBG("Invalid checksum\n"); + return 0; + } + if (hdr99->adler != core99_calc_adler(datas)) { + DBG("Invalid adler\n"); + return 0; + } + return hdr99->generation; +} + +static int __pmac sm_erase_bank(int bank) +{ + int stat, i; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank); + + out_8(base, SM_FLASH_CMD_ERASE_SETUP); + out_8(base, SM_FLASH_CMD_ERASE_CONFIRM); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Miron flash erase timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + if (!(stat & SM_FLASH_STATUS_DONE)) + break; + } + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: AMD flash erase timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: AMD flash write timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + if (stat != 0) + break; + } + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; igeneration++; + hdr99->hdr.signature = CORE99_SIGNATURE; + hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr); + hdr99->adler = core99_calc_adler(nvram_image); + core99_bank = core99_bank ? 0 : 1; + if (core99_erase_bank) + if (core99_erase_bank(core99_bank)) { + printk("nvram: Error erasing bank %d\n", core99_bank); + goto bail; + } + if (core99_write_bank) + if (core99_write_bank(core99_bank, nvram_image)) + printk("nvram: Error writing bank %d\n", core99_bank); + bail: + spin_unlock_irqrestore(&nv_lock, flags); + + return 0; +} + +int __init pmac_nvram_init(void) +{ + struct device_node *dp; + u32 gen_bank0, gen_bank1; + int i; + + dp = find_devices("nvram"); + if (dp == NULL) { + printk(KERN_ERR "Can't find NVRAM device\n"); + return -ENODEV; + } + if (!device_is_compatible(dp, "nvram,flash")) { + printk(KERN_ERR "Incompatible type of NVRAM\n"); + return -ENXIO; + } + + nvram_image = alloc_bootmem(NVRAM_SIZE); + if (nvram_image == NULL) { + printk(KERN_ERR "nvram: can't allocate ram image\n"); + return -ENOMEM; + } + nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2); + + DBG("nvram: Checking bank 0...\n"); + + gen_bank0 = core99_check((u8 *)nvram_data); + gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE); + core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0; + + DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1); + DBG("nvram: Active bank is: %d\n", core99_bank); + + for (i=0; iindex; +} + +u8 __pmac pmac_xpram_read(int xpaddr) +{ + int offset = pmac_get_partition(pmac_nvram_XPRAM); + loff_t index; + u8 buf; + ssize_t count; + + if (offset < 0 || xpaddr < 0 || xpaddr > 0x100) + return 0xff; + index = offset + xpaddr; + + count = ppc_md.nvram_read(&buf, 1, &index); + if (count != 1) + return 0xff; + return buf; +} + +void __pmac pmac_xpram_write(int xpaddr, u8 data) +{ + int offset = pmac_get_partition(pmac_nvram_XPRAM); + loff_t index; + u8 buf; + + if (offset < 0 || xpaddr < 0 || xpaddr > 0x100) + return; + index = offset + xpaddr; + buf = data; + + ppc_md.nvram_write(&buf, 1, &index); +} + +EXPORT_SYMBOL(pmac_get_partition); +EXPORT_SYMBOL(pmac_xpram_read); +EXPORT_SYMBOL(pmac_xpram_write); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_pci.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,755 @@ +/* + * Support for PCI bridges found on Power Macintoshes. + * At present the "bandit" and "chaos" bridges are supported. + * Fortunately you access configuration space in the same + * way with either bridge. + * + * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org) + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pci.h" +#include "pmac.h" + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +extern int pci_probe_only; +extern int pci_read_irq_line(struct pci_dev *pci_dev); + +/* XXX Could be per-controller, but I don't think we risk anything by + * assuming we won't have both UniNorth and Bandit */ +static int has_uninorth; +static struct pci_controller *u3_agp; +u8 pci_cache_line_size; +struct pci_dev *k2_skiplist[2]; + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node != 0;node = node->sibling) { + int * bus_range; + unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = (unsigned int *) get_property(node, "class-code", 0); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = (int *) get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int * bus_range; + int len; + + /* Lookup the "bus-range" property for the hose */ + bus_range = (int *) get_property(bridge, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s\n", + bridge->full_name); + return; + } + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + +/* + * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers. + * + * The "Bandit" version is present in all early PCI PowerMacs, + * and up to the first ones using Grackle. Some machines may + * have 2 bandit controllers (2 PCI busses). + * + * "Chaos" is used in some "Bandit"-type machines as a bridge + * for the separate display bus. It is accessed the same + * way as bandit, but cannot be probed for devices. It therefore + * has its own config access functions. + * + * The "UniNorth" version is present in all Core99 machines + * (iBook, G4, new IMacs, and all the recent Apple machines). + * It contains 3 controllers in one ASIC. + * + * The U3 is the bridge used on G5 machines. It contains on + * AGP bus which is dealt with the old UniNorth access routines + * and an HyperTransport bus which uses its own set of access + * functions. + */ + +#define MACRISC_CFA0(devfn, off) \ + ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ + | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned long)(off)) & 0xFCUL)) + +#define MACRISC_CFA1(bus, devfn, off) \ + ((((unsigned long)(bus)) << 16) \ + |(((unsigned long)(devfn)) << 8) \ + |(((unsigned long)(off)) & 0xFCUL) \ + |1UL) + +static unsigned long __pmac macrisc_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, u8 offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + if (dev_fn < (11 << 3)) + return 0; + caddr = MACRISC_CFA0(dev_fn, offset); + } else + caddr = MACRISC_CFA1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= has_uninorth ? 0x07 : 0x03; + return ((unsigned long)hose->cfg_data) + offset; +} + +static int __pmac macrisc_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + struct device_node *busdn; + unsigned long addr; + int i; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + if (busdn == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + hose = busdn->phb; + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * When a device in K2 is powered down, we die on config + * cycle accesses. Fix that here. + */ + for (i=0; i<2; i++) + if (k2_skiplist[i] && k2_skiplist[i]->bus == bus && + k2_skiplist[i]->devfn == devfn) { + switch (len) { + case 1: + *val = 0xff; break; + case 2: + *val = 0xffff; break; + default: + *val = 0xfffffffful; break; + } + return PCIBIOS_SUCCESSFUL; + } + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int __pmac macrisc_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + struct device_node *busdn; + unsigned long addr; + int i; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + if (busdn == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + hose = busdn->phb; + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * When a device in K2 is powered down, we die on config + * cycle accesses. Fix that here. + */ + for (i=0; i<2; i++) + if (k2_skiplist[i] && k2_skiplist[i]->bus == bus && + k2_skiplist[i]->devfn == devfn) + return PCIBIOS_SUCCESSFUL; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops macrisc_pci_ops = +{ + macrisc_read_config, + macrisc_write_config +}; + +/* + * These versions of U3 HyperTransport config space access ops do not + * implement self-view of the HT host yet + */ + +#define U3_HT_CFA0(devfn, off) \ + ((((unsigned long)devfn) << 8) | offset) +#define U3_HT_CFA1(bus, devfn, off) \ + (U3_HT_CFA0(devfn, off) \ + + (((unsigned long)bus) << 16) \ + + 0x01000000UL) + +static unsigned long __pmac u3_ht_cfg_access(struct pci_controller* hose, + u8 bus, u8 devfn, u8 offset) +{ + if (bus == hose->first_busno) { + /* For now, we don't self probe U3 HT bridge */ + if (PCI_FUNC(devfn) != 0 || PCI_SLOT(devfn) > 7 || + PCI_SLOT(devfn) < 1) + return 0; + return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset); + } else + return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); +} + +static int __pmac u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + struct device_node *busdn; + unsigned long addr; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + if (busdn == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + hose = busdn->phb; + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int __pmac u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + struct device_node *busdn; + unsigned long addr; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + if (busdn == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + hose = busdn->phb; + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + u3_ht_read_config, + u3_ht_write_config +}; + +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_busses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u3_agp = hose; +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + struct device_node *np = (struct device_node *)hose->arch_data; + int i, cur; + + hose->ops = &u3_ht_pci_ops; + + /* We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000); + + /* + * /ht node doesn't expose a "ranges" property, so we "remove" regions that + * have been allocated to AGP. So far, this version of the code doesn't assign + * any of the 0xfxxxxxxx "fine" memory regions to /ht. + * We need to fix that sooner or later by either parsing all child "ranges" + * properties or figuring out the U3 address space decoding logic and + * then read it's configuration register (if any). + */ + hose->io_base_phys = 0xf4000000 + 0x00400000; + hose->io_base_virt = ioremap(hose->io_base_phys, 0x00400000); + isa_io_base = pci_io_base = (unsigned long) hose->io_base_virt; + hose->io_resource.name = np->full_name; + hose->io_resource.start = 0; + hose->io_resource.end = 0x003fffff; + hose->io_resource.flags = IORESOURCE_IO; + hose->pci_mem_offset = 0; + hose->first_busno = 0; + hose->last_busno = 0xef; + hose->mem_resources[0].name = np->full_name; + hose->mem_resources[0].start = 0x80000000; + hose->mem_resources[0].end = 0xefffffff; + hose->mem_resources[0].flags = IORESOURCE_MEM; + + if (u3_agp == NULL) { + DBG("U3 has no AGP, using full resource range\n"); + return; + } + + /* We "remove" the AGP resources from the resources allocated to HT, that + * is we create "holes". However, that code does assumptions that so far + * happen to be true (cross fingers...), typically that resources in the + * AGP node are properly ordered + */ + cur = 0; + for (i=0; i<3; i++) { + struct resource *res = &u3_agp->mem_resources[i]; + if (res->flags != IORESOURCE_MEM) + continue; + /* We don't care about "fine" resources */ + if (res->start >= 0xf0000000) + continue; + /* Check if it's just a matter of "shrinking" us in one direction */ + if (hose->mem_resources[cur].start == res->start) { + DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].start, res->end + 1); + hose->mem_resources[cur].start = res->end + 1; + continue; + } + if (hose->mem_resources[cur].end == res->end) { + DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].end, res->start - 1); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + /* No, it's not the case, we need a hole */ + if (cur == 2) { + /* not enough resources for a hole, we drop part of the range */ + printk(KERN_WARNING "Running out of resources for /ht host !\n"); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + cur++; + DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n", + cur-1, res->start - 1, cur, res->end + 1); + hose->mem_resources[cur].name = np->full_name; + hose->mem_resources[cur].flags = IORESOURCE_MEM; + hose->mem_resources[cur].start = res->end + 1; + hose->mem_resources[cur].end = hose->mem_resources[cur-1].end; + hose->mem_resources[cur-1].end = res->start - 1; + } +} + +static void __init pmac_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int primary) +{ + static unsigned int static_lc_ranges[2024]; + unsigned int *dt_ranges, *lc_ranges, *ranges, *prev; + unsigned int size; + int rlen = 0, orig_rlen; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + + np = na + 5; + + /* First we try to merge ranges to fix a problem with some pmacs + * that can have more than 3 ranges, fortunately using contiguous + * addresses -- BenH + */ + dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + if (!dt_ranges) + return; + /* lc_ranges = (unsigned int *) alloc_bootmem(rlen);*/ + lc_ranges = static_lc_ranges; + if (!lc_ranges) + return; /* what can we do here ? */ + memcpy(lc_ranges, dt_ranges, rlen); + orig_rlen = rlen; + + /* Let's work on a copy of the "ranges" property instead of damaging + * the device-tree image in memory + */ + ranges = lc_ranges; + prev = NULL; + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + if (prev) { + if (prev[0] == ranges[0] && prev[1] == ranges[1] && + (prev[2] + prev[na+4]) == ranges[2] && + (prev[na+2] + prev[na+4]) == ranges[na+2]) { + prev[na+4] += ranges[na+4]; + ranges[0] = 0; + ranges += np; + continue; + } + } + prev = ranges; + ranges += np; + } + + /* + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + ranges = lc_ranges; + rlen = orig_rlen; + while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + size = ranges[na+4]; + switch (ranges[0] >> 24) { + case 1: /* I/O space */ + if (ranges[2] != 0) + break; + hose->io_base_phys = ranges[na+2]; + /* limit I/O space to 16MB */ + if (size > 0x01000000) + size = 0x01000000; + hose->io_base_virt = ioremap(ranges[na+2], size); + if (primary) + isa_io_base = (unsigned long) hose->io_base_virt; + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = ranges[2]; + break; + case 2: /* memory space */ + memno = 0; + if (ranges[1] == 0 && ranges[2] == 0 + && ranges[na+4] <= (16 << 20)) { + /* 1st 16MB, i.e. ISA memory area */ +#if 0 + if (primary) + isa_mem_base = ranges[na+2]; +#endif + memno = 1; + } + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + if (memno == 0) + hose->pci_mem_offset = ranges[na+2] - ranges[2]; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + res->start = ranges[na+2]; + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + ranges += np; + } +} + +/* + * We assume that if we have a G3 powermac, we have one bridge called + * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise, + * if we have one or more bandit or chaos bridges, we don't have a MPC106. + */ +static int __init add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + char* disp_name; + int *bus_range; + int primary = 1; + struct property *of_prop; + + DBG("Adding PCI host bridge %s\n", dev->full_name); + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } + + hose = pci_alloc_pci_controller(phb_type_apple); + if (!hose) + return -ENOMEM; + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + of_prop = (struct property *)alloc_bootmem(sizeof(struct property) + + sizeof(hose->global_number)); + if (of_prop) { + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(hose->global_number); + of_prop->value = (unsigned char *)&of_prop[1]; + memcpy(of_prop->value, &hose->global_number, sizeof(hose->global_number)); + prom_add_property(dev, of_prop); + } + + disp_name = NULL; + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", + disp_name, hose->first_busno, hose->last_busno); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pmac_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + return 0; +} + + +void __init pmac_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) + pci_read_irq_line(dev); + + pci_fix_bus_sysdata(); +} + +static void __init pmac_fixup_phb_resources(void) +{ + struct pci_controller *hose; + + for (hose = hose_head; hose; hose = hose->next) { + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; + hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + hose->global_number, + hose->io_resource.start, hose->io_resource.end); + } +} + +void __init pmac_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + /* Probe root PCI hosts, that is on U3 the AGP host and the + * HyperTransport host. That one is actually "kept" around + * and actually added last as it's resource management relies + * on the AGP resources to have been setup first + */ + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Now setup the HyperTransport host if we found any + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + /* Fixup the IO resources on our host bridges as the common code + * does it only for childs of the host bridges + */ + pmac_fixup_phb_resources(); + + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + + pmac_check_ht_link(); + + /* Tell pci.c to use the common resource allocation mecanism */ + pci_probe_only = 0; + + /* HT don't do more than 64 bytes transfers. FIXME: Deal with + * the exception of U3/AGP (hook into pci_set_mwi) + */ + pci_cache_line_size = 16; /* 64 bytes */ +} + +/* + * Disable second function on K2-SATA, it's broken + * and disable IO BARs on first one + */ +void fixup_k2_sata(struct pci_dev* dev) +{ + int i; + u16 cmd; + + if (PCI_FUNC(dev->devfn) > 0) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 6; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 5; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } +} --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_setup.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,504 @@ +/* + * arch/ppc/platforms/setup.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * Derived from "arch/alpha/kernel/setup.c" + * Copyright (C) 1995 Linus Torvalds + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* + * bootup setup stuff.. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +extern char saved_command_line[]; +static int current_root_goodness = -1; +#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ + +extern int powersave_nap; +int sccdbg; + +extern void udbg_init_scc(struct device_node *np); + +#ifdef CONFIG_BOOTX_TEXT +void pmac_progress(char *s, unsigned short hex); +#endif + +void __pmac pmac_show_cpuinfo(struct seq_file *m) +{ + struct device_node *np; + char *pp; + int plen; + char* mbname; + int mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, + PMAC_MB_INFO_MODEL, 0); + unsigned int mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, + PMAC_MB_INFO_FLAGS, 0); + + if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME, + (long)&mbname) != 0) + mbname = "Unknown"; + + /* find motherboard type */ + seq_printf(m, "machine\t\t: "); + np = find_devices("device-tree"); + if (np != NULL) { + pp = (char *) get_property(np, "model", NULL); + if (pp != NULL) + seq_printf(m, "%s\n", pp); + else + seq_printf(m, "PowerMac\n"); + pp = (char *) get_property(np, "compatible", &plen); + if (pp != NULL) { + seq_printf(m, "motherboard\t:"); + while (plen > 0) { + int l = strlen(pp) + 1; + seq_printf(m, " %s", pp); + plen -= l; + pp += l; + } + seq_printf(m, "\n"); + } + } else + seq_printf(m, "PowerMac\n"); + + /* print parsed model */ + seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname); + seq_printf(m, "pmac flags\t: %08x\n", mbflags); + seq_printf(m, "memory\t\t: %luMB\n", lmb_phys_mem_size() >> 20); + + /* Checks "l2cr-value" property in the registry */ + np = find_devices("cpus"); + if (np == 0) + np = find_type_devices("cpu"); + if (np != 0) { + unsigned int *l2cr = (unsigned int *) + get_property(np, "l2cr-value", NULL); + if (l2cr != 0) { + seq_printf(m, "l2cr override\t: 0x%x\n", *l2cr); + } + } + + /* Indicate newworld */ + seq_printf(m, "pmac-generation\t: NewWorld\n"); +} + + +void __init pmac_setup_arch(void) +{ + struct device_node *cpu; + int *fp; + unsigned long pvr; + + pvr = PVR_VER(mfspr(PVR)); + + /* Set loops_per_jiffy to a half-way reasonable value, + for use until calibrate_delay gets called. */ + cpu = find_type_devices("cpu"); + if (cpu != 0) { + fp = (int *) get_property(cpu, "clock-frequency", NULL); + if (fp != 0) { + if (pvr == 4 || pvr >= 8) + /* 604, G3, G4 etc. */ + loops_per_jiffy = *fp / HZ; + else + /* 601, 603, etc. */ + loops_per_jiffy = *fp / (2*HZ); + } else + loops_per_jiffy = 50000000 / HZ; + } + + /* We can NAP */ + powersave_nap = 1; + + /* Initialize the PMU */ + find_via_pmu(); + + /* Init NVRAM access */ + pmac_nvram_init(); + + /* Setup SMP callback */ +#ifdef CONFIG_SMP + pmac_setup_smp(); +#endif + /* Setup the PCI DMA to "direct" for now, until we have proper + * DART support and can deal with more than 2Gb of RAM + */ + pci_dma_init_direct(); + + /* Lookup PCI hosts */ + pmac_pci_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif +} + +extern char *bootpath; +extern char *bootdevice; +void *boot_host; +int boot_target; +int boot_part; +extern dev_t boot_dev; + +#ifdef CONFIG_SCSI +void __init note_scsi_host(struct device_node *node, void *host) +{ + int l; + char *p; + + l = strlen(node->full_name); + if (bootpath != NULL && bootdevice != NULL + && strncmp(node->full_name, bootdevice, l) == 0 + && (bootdevice[l] == '/' || bootdevice[l] == 0)) { + boot_host = host; + /* + * There's a bug in OF 1.0.5. (Why am I not surprised.) + * If you pass a path like scsi/sd@1:0 to canon, it returns + * something like /bandit@F2000000/gc@10/53c94@10000/sd@0,0 + * That is, the scsi target number doesn't get preserved. + * So we pick the target number out of bootpath and use that. + */ + p = strstr(bootpath, "/sd@"); + if (p != NULL) { + p += 4; + boot_target = simple_strtoul(p, NULL, 10); + p = strchr(p, ':'); + if (p != NULL) + boot_part = simple_strtoul(p + 1, NULL, 10); + } + } +} +#endif + +#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC) +static dev_t __init find_ide_boot(void) +{ + char *p; + int n; + dev_t __init pmac_find_ide_boot(char *bootdevice, int n); + + if (bootdevice == NULL) + return 0; + p = strrchr(bootdevice, '/'); + if (p == NULL) + return 0; + n = p - bootdevice; + + return pmac_find_ide_boot(bootdevice, n); +} +#endif /* CONFIG_BLK_DEV_IDE && CONFIG_BLK_DEV_IDE_PMAC */ + +void __init find_boot_device(void) +{ +#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC) + boot_dev = find_ide_boot(); +#endif +} + +static int initializing = 1; + +static int pmac_late_init(void) +{ + initializing = 0; + return 0; +} + +late_initcall(pmac_late_init); + +/* can't be __init - can be called whenever a disk is first accessed */ +void __pmac note_bootable_part(dev_t dev, int part, int goodness) +{ + static int found_boot = 0; + char *p; + + if (!initializing) + return; + if ((goodness <= current_root_goodness) && + ROOT_DEV != DEFAULT_ROOT_DEVICE) + return; + p = strstr(saved_command_line, "root="); + if (p != NULL && (p == saved_command_line || p[-1] == ' ')) + return; + + if (!found_boot) { + find_boot_device(); + found_boot = 1; + } + if (!boot_dev || dev == boot_dev) { + ROOT_DEV = dev + part; + boot_dev = 0; + current_root_goodness = goodness; + } +} + +void __pmac pmac_restart(char *cmd) +{ + pmu_restart(); +} + +void __pmac pmac_power_off(void) +{ + pmu_shutdown(); +} + +void __pmac pmac_halt(void) +{ + pmac_power_off(); +} + +#ifdef CONFIG_BOOTX_TEXT +static int dummy_getc_poll(void) +{ + return -1; +} + +static unsigned char dummy_getc(void) +{ + return 0; +} + +static void btext_putc(unsigned char c) +{ + btext_drawchar(c); +} +#endif /* CONFIG_BOOTX_TEXT */ + +/* + * Early initialization. + * Relocation is on but do not reference unbolted pages + * Also, device-tree hasn't been "finished", so don't muck with + * it too much + */ +void __init pmac_init_early(void) +{ + hpte_init_pSeries(); + +#ifdef CONFIG_BOOTX_TEXT + ppc_md.udbg_putc = btext_putc; + ppc_md.udbg_getc = dummy_getc; + ppc_md.udbg_getc_poll = dummy_getc_poll; +#endif /* CONFIG_BOOTX_TEXT */ +} + +extern void* OpenPIC_Addr; +extern void* OpenPIC2_Addr; +extern u_int OpenPIC_NumInitSenses; +extern u_char *OpenPIC_InitSenses; +extern void openpic_init(int main_pic, int offset, unsigned char* chrp_ack, + int programmer_switch_irq); +extern void openpic2_init(int offset); +extern int openpic_get_irq(struct pt_regs *regs); +extern int openpic2_get_irq(struct pt_regs *regs); + +static int pmac_cascade_irq = -1; + +static irqreturn_t pmac_u3_do_cascade(int cpl, void *dev_id, struct pt_regs *regs) +{ + int irq; + + for (;;) { + irq = openpic2_get_irq(regs); + if (irq == -1) + break; + ppc_irq_dispatch_handler(regs, irq); + } + return IRQ_HANDLED; +} + +static __init void pmac_init_IRQ(void) +{ + struct device_node *irqctrler = NULL; + struct device_node *irqctrler2 = NULL; + struct device_node *np = NULL; + + /* We first try to detect Apple's new Core99 chipset, since mac-io + * is quite different on those machines and contains an IBM MPIC2. + */ + while ((np = of_find_node_by_type(np, "open-pic")) != NULL) { + struct device_node *parent = of_get_parent(np); + if (parent && !strcmp(parent->name, "u3")) + irqctrler2 = of_node_get(np); + else + irqctrler = of_node_get(np); + of_node_put(parent); + } + if (irqctrler != NULL && irqctrler->n_addrs > 0) { + unsigned char senses[128]; + + printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n", + (unsigned int)irqctrler->addrs[0].address); + + prom_get_irq_senses(senses, 0, 128); + OpenPIC_InitSenses = senses; + OpenPIC_NumInitSenses = 128; + OpenPIC_Addr = ioremap(irqctrler->addrs[0].address, + irqctrler->addrs[0].size); + openpic_init(1, 0, NULL, -1); + + if (irqctrler2 != NULL && irqctrler2->n_intrs > 0 && + irqctrler2->n_addrs > 0) { + printk(KERN_INFO "Slave OpenPIC at 0x%08x hooked on IRQ %d\n", + (u32)irqctrler2->addrs[0].address, + irqctrler2->intrs[0].line); + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler2, 0, 0); + OpenPIC2_Addr = ioremap(irqctrler2->addrs[0].address, + irqctrler2->addrs[0].size); + prom_get_irq_senses(senses, 128, 128 + 128); + OpenPIC_InitSenses = senses; + OpenPIC_NumInitSenses = 128; + openpic2_init(128); + pmac_cascade_irq = irqctrler2->intrs[0].line; + } + } + of_node_put(irqctrler); + of_node_put(irqctrler2); +} + +/* We cannot do request_irq too early ... Right now, we get the + * cascade as a core_initcall, which should be fine for our needs + */ +static int __init pmac_irq_cascade_init(void) +{ + if (request_irq(pmac_cascade_irq, pmac_u3_do_cascade, 0, + "U3->K2 Cascade", NULL)) + printk(KERN_ERR "Unable to get OpenPIC IRQ for cascade\n"); + return 0; +} + +core_initcall(pmac_irq_cascade_init); + +void __init pmac_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* Probe motherboard chipset */ + pmac_feature_init(); + + /* Init SCC */ + if (strstr(cmd_line, "sccdbg")) { + sccdbg = 1; + udbg_init_scc(NULL); + } + + /* Fill up the machine description */ + ppc_md.setup_arch = pmac_setup_arch; + ppc_md.get_cpuinfo = pmac_show_cpuinfo; + + ppc_md.init_IRQ = pmac_init_IRQ; + ppc_md.get_irq = openpic_get_irq; + + ppc_md.pcibios_fixup = pmac_pcibios_fixup; + + ppc_md.restart = pmac_restart; + ppc_md.power_off = pmac_power_off; + ppc_md.halt = pmac_halt; + + ppc_md.get_boot_time = pmac_get_boot_time; + ppc_md.set_rtc_time = pmac_set_rtc_time; + ppc_md.get_rtc_time = pmac_get_rtc_time; + ppc_md.calibrate_decr = pmac_calibrate_decr; + + ppc_md.feature_call = pmac_do_feature_call; + + +#ifdef CONFIG_BOOTX_TEXT + ppc_md.progress = pmac_progress; +#endif /* CONFIG_BOOTX_TEXT */ + + if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0); + +} + +#ifdef CONFIG_BOOTX_TEXT +void __init pmac_progress(char *s, unsigned short hex) +{ + if (sccdbg) { + udbg_puts(s); + udbg_putc('\n'); + } + else if (boot_text_mapped) { + btext_drawstring(s); + btext_drawchar('\n'); + } +} +#endif /* CONFIG_BOOTX_TEXT */ + +static int __init pmac_declare_of_platform_devices(void) +{ + struct device_node *np; + + np = find_devices("u3"); + if (np) { + for (np = np->child; np != NULL; np = np->sibling) + if (strncmp(np->name, "i2c", 3) == 0) { + of_platform_device_create(np, "u3-i2c"); + break; + } + } + + return 0; +} + +device_initcall(pmac_declare_of_platform_devices); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_smp.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,174 @@ +/* + * SMP support for power macintosh. + * + * We support both the old "powersurge" SMP architecture + * and the current Core99 (G4 PowerMac) machines. + * + * Note that we don't support the very first rev. of + * Apple/DayStar 2 CPUs board, the one with the funky + * watchdog. Hopefully, none of these should be there except + * maybe internally to Apple. I should probably still add some + * code to detect this card though and disable SMP. --BenH. + * + * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net) + * and Ben Herrenschmidt . + * + * Support for DayStar quad CPU cards + * Copyright (C) XLR8, Inc. 1994-2000 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "open_pic.h" + +extern void pmac_secondary_start_1(void); +extern void pmac_secondary_start_2(void); +extern void pmac_secondary_start_3(void); + +extern void smp_openpic_message_pass(int target, int msg, unsigned long data, int wait); + +extern struct smp_ops_t *smp_ops; + +static int __init smp_core99_probe(void) +{ + struct device_node *cpus; + int ncpus = 1; + + /* Maybe use systemconfiguration here ? */ + if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); + cpus = find_type_devices("cpu"); + if (cpus == NULL) + return 0; + + while ((cpus = cpus->next) != NULL) + ++ncpus; + + printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus); + + if (ncpus > 1) + openpic_request_IPIs(); + + return ncpus; +} + +static void __init smp_core99_kick_cpu(int nr) +{ + int save_vector; + unsigned long new_vector; + unsigned long flags; + volatile unsigned int *vector + = ((volatile unsigned int *)(KERNELBASE+0x100)); + + if (nr < 1 || nr > 3) + return; + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); + + local_irq_save(flags); + local_irq_disable(); + + /* Save reset vector */ + save_vector = *vector; + + /* Setup fake reset vector that does + * b .pmac_secondary_start - KERNELBASE + */ + switch(nr) { + case 1: + new_vector = (unsigned long)pmac_secondary_start_1; + break; + case 2: + new_vector = (unsigned long)pmac_secondary_start_2; + break; + case 3: + new_vector = (unsigned long)pmac_secondary_start_3; + break; + } + *vector = 0x48000002 + (new_vector - KERNELBASE); + + /* flush data cache and inval instruction cache */ + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + /* Put some life in our friend */ + pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); + paca[nr].xProcStart = 1; + + /* FIXME: We wait a bit for the CPU to take the exception, I should + * instead wait for the entry code to set something for me. Well, + * ideally, all that crap will be done in prom.c and the CPU left + * in a RAM-based wait loop like CHRP. + */ + mdelay(1); + + /* Restore our exception vector */ + *vector = save_vector; + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + local_irq_restore(flags); + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); +} + +static void __init smp_core99_setup_cpu(int cpu_nr) +{ + /* Setup openpic */ + do_openpic_setup_cpu(); + + if (cpu_nr == 0) { + extern void g5_phy_disable_cpu1(void); + + /* If we didn't start the second CPU, we must take + * it off the bus + */ + if (num_online_cpus() < 2) + g5_phy_disable_cpu1(); + if (ppc_md.progress) ppc_md.progress("core99_setup_cpu 0 done", 0x349); + } +} + +extern void smp_generic_give_timebase(void); +extern void smp_generic_take_timebase(void); + +struct smp_ops_t core99_smp_ops __pmacdata = { + .message_pass = smp_openpic_message_pass, + .probe = smp_core99_probe, + .kick_cpu = smp_core99_kick_cpu, + .setup_cpu = smp_core99_setup_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; + +void __init pmac_setup_smp(void) +{ + smp_ops = &core99_smp_ops; +} --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pmac_time.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,157 @@ +/* + * Support for periodic interrupts (100 per second) and for getting + * the current time from the RTC on Power Macintoshes. + * + * We use the decrementer register for our periodic interrupts. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +extern void setup_default_decr(void); + +/* Apparently the RTC stores seconds since 1 Jan 1904 */ +#define RTC_OFFSET 2082844800 + +/* + * Calibrate the decrementer frequency with the VIA timer 1. + */ +#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ + +extern struct timezone sys_tz; +extern void to_tm(int tim, struct rtc_time * tm); + +void __pmac pmac_get_rtc_time(struct rtc_time *tm) +{ + struct adb_request req; + unsigned int now; + + /* Get the time from the RTC */ + if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) + return; + while (!req.complete) + pmu_poll(); + if (req.reply_len != 4) + printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n", + req.reply_len); + now = (req.reply[0] << 24) + (req.reply[1] << 16) + + (req.reply[2] << 8) + req.reply[3]; + DBG("get: %u -> %u\n", (int)now, (int)(now - RTC_OFFSET)); + now -= RTC_OFFSET; + + to_tm(now, tm); + tm->tm_year -= 1900; + tm->tm_mon -= 1; + + DBG("-> tm_mday: %d, tm_mon: %d, tm_year: %d, %d:%02d:%02d\n", + tm->tm_mday, tm->tm_mon, tm->tm_year, + tm->tm_hour, tm->tm_min, tm->tm_sec); +} + +int __pmac pmac_set_rtc_time(struct rtc_time *tm) +{ + struct adb_request req; + unsigned int nowtime; + + DBG("set: tm_mday: %d, tm_mon: %d, tm_year: %d, %d:%02d:%02d\n", + tm->tm_mday, tm->tm_mon, tm->tm_year, + tm->tm_hour, tm->tm_min, tm->tm_sec); + + nowtime = mktime(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); + DBG("-> %u -> %u\n", (int)nowtime, (int)(nowtime + RTC_OFFSET)); + nowtime += RTC_OFFSET; + + if (pmu_request(&req, NULL, 5, PMU_SET_RTC, + nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) + return 0; + while (!req.complete) + pmu_poll(); + if (req.reply_len != 0) + printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n", + req.reply_len); + return 1; +} + +void __init pmac_get_boot_time(struct rtc_time *tm) +{ + pmac_get_rtc_time(tm); + +#ifdef disabled__CONFIG_NVRAM + s32 delta = 0; + int dst; + + delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16; + delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8; + delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb); + if (delta & 0x00800000UL) + delta |= 0xFF000000UL; + dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0); + printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60, + dst ? "on" : "off"); +#endif +} + +/* + * Query the OF and get the decr frequency. + * This was taken from the pmac time_init() when merging the prep/pmac + * time functions. + */ +void __init pmac_calibrate_decr(void) +{ + struct device_node *cpu; + unsigned int freq, *fp; + struct div_result divres; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = find_type_devices("cpu"); + if (cpu == 0) + panic("can't find cpu node in time_init"); + fp = (unsigned int *) get_property(cpu, "timebase-frequency", NULL); + if (fp == 0) + panic("can't get cpu timebase frequency"); + freq = *fp; + printk("time_init: decrementer frequency = %u.%.6u MHz\n", + freq/1000000, freq%1000000); + tb_ticks_per_jiffy = freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = freq / 1000000; + tb_to_us = mulhwu_scale_factor(freq, 1000000); + div128_by_32( 1024*1024, 0, tb_ticks_per_sec, &divres ); + tb_to_xs = divres.result_low; + + setup_default_decr(); +} + --- linux-2.6.3-rc2/arch/ppc64/kernel/ppc_ksyms.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc64/kernel/ppc_ksyms.c 2004-02-12 00:40:52.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -48,7 +49,6 @@ #include #endif -extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); extern int do_signal(sigset_t *, struct pt_regs *); int abs(int); --- linux-2.6.3-rc2/arch/ppc64/kernel/prom.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/prom.c 2004-02-12 00:39:52.000000000 -0800 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include "open_pic.h" @@ -115,24 +117,15 @@ typedef unsigned long interpret_func(str static interpret_func interpret_pci_props; static interpret_func interpret_isa_props; static interpret_func interpret_root_props; +static interpret_func interpret_dbdma_props; +static interpret_func interpret_macio_props; #ifndef FB_MAX /* avoid pulling in all of the fb stuff */ #define FB_MAX 8 #endif - -struct prom_t prom = { - 0, /* entry */ - 0, /* chosen */ - 0, /* cpu */ - 0, /* stdout */ - 0, /* disp_node */ - {0,0,0,{0},NULL}, /* args */ - 0, /* version */ - 32, /* encode_phys_size */ - 0 /* bi_rec pointer */ -}; - +/* prom structure */ +struct prom_t prom; char *prom_display_paths[FB_MAX] __initdata = { 0, }; unsigned int prom_num_displays = 0; @@ -140,7 +133,6 @@ char *of_stdout_device = 0; extern struct rtas_t rtas; extern unsigned long klimit; -extern unsigned long embedded_sysmap_end; extern struct lmb lmb; #define MAX_PHB 16 * 3 // 16 Towers * 3 PHBs/tower @@ -189,15 +181,18 @@ void prom_dump_lmb(void); extern unsigned long reloc_offset(void); extern void enter_prom(void *dummy,...); +extern void copy_and_flush(unsigned long dest, unsigned long src, + unsigned long size, unsigned long offset); extern char cmd_line[512]; /* XXX */ unsigned long dev_tree_size; +unsigned long _get_PIR(void); #ifdef CONFIG_HMT struct { unsigned int pir; unsigned int threadid; -} hmt_thread_data[NR_CPUS] = {0}; +} hmt_thread_data[NR_CPUS]; #endif /* CONFIG_HMT */ char testString[] = "LINUX\n"; @@ -341,9 +336,14 @@ prom_initialize_naca(unsigned long mem) RELOC("d-cache-size"), &size, sizeof(size)); - call_prom(RELOC("getprop"), 4, 1, node, - RELOC("d-cache-line-size"), - &lsize, sizeof(lsize)); + if (_systemcfg->platform == PLATFORM_POWERMAC) + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("d-cache-block-size"), + &lsize, sizeof(lsize)); + else + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("d-cache-line-size"), + &lsize, sizeof(lsize)); _systemcfg->dCacheL1Size = size; _systemcfg->dCacheL1LineSize = lsize; @@ -354,9 +354,14 @@ prom_initialize_naca(unsigned long mem) RELOC("i-cache-size"), &size, sizeof(size)); - call_prom(RELOC("getprop"), 4, 1, node, - RELOC("i-cache-line-size"), - &lsize, sizeof(lsize)); + if (_systemcfg->platform == PLATFORM_POWERMAC) + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("i-cache-block-size"), + &lsize, sizeof(lsize)); + else + call_prom(RELOC("getprop"), 4, 1, node, + RELOC("i-cache-line-size"), + &lsize, sizeof(lsize)); _systemcfg->iCacheL1Size = size; _systemcfg->iCacheL1LineSize = lsize; @@ -377,6 +382,8 @@ prom_initialize_naca(unsigned long mem) struct isa_reg_property reg; union pci_range ranges; + if (_systemcfg->platform == PLATFORM_POWERMAC) + continue; type[0] = 0; call_prom(RELOC("getprop"), 4, 1, node, RELOC("ibm,aix-loc"), type, sizeof(type)); @@ -407,24 +414,27 @@ prom_initialize_naca(unsigned long mem) } } - _naca->interrupt_controller = IC_INVALID; - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - call_prom(RELOC("getprop"), 4, 1, node, RELOC("name"), - type, sizeof(type)); - if (strcmp(type, RELOC("interrupt-controller"))) { - continue; - } - call_prom(RELOC("getprop"), 4, 1, node, RELOC("compatible"), - type, sizeof(type)); - if (strstr(type, RELOC("open-pic"))) { - _naca->interrupt_controller = IC_OPEN_PIC; - } else if (strstr(type, RELOC("ppc-xicp"))) { - _naca->interrupt_controller = IC_PPC_XIC; - } else { - prom_print(RELOC("prom: failed to recognize interrupt-controller\n")); + if (_systemcfg->platform == PLATFORM_POWERMAC) + _naca->interrupt_controller = IC_OPEN_PIC; + else { + _naca->interrupt_controller = IC_INVALID; + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("name"), + type, sizeof(type)); + if (strcmp(type, RELOC("interrupt-controller"))) + continue; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("compatible"), + type, sizeof(type)); + if (strstr(type, RELOC("open-pic"))) + _naca->interrupt_controller = IC_OPEN_PIC; + else if (strstr(type, RELOC("ppc-xicp"))) + _naca->interrupt_controller = IC_PPC_XIC; + else + prom_print(RELOC("prom: failed to recognize" + " interrupt-controller\n")); + break; } - break; } if (_naca->interrupt_controller == IC_INVALID) { @@ -438,7 +448,8 @@ prom_initialize_naca(unsigned long mem) _systemcfg->physicalMemorySize = lmb_phys_mem_size(); - if (_systemcfg->platform == PLATFORM_PSERIES) { + if (_systemcfg->platform == PLATFORM_PSERIES || + _systemcfg->platform == PLATFORM_POWERMAC) { unsigned long rnd_mem_size, pteg_count; /* round mem_size up to next power of 2 */ @@ -517,12 +528,19 @@ prom_initialize_lmb(unsigned long mem) char type[64]; unsigned long i, offset = reloc_offset(); struct prom_t *_prom = PTRRELOC(&prom); + struct systemcfg *_systemcfg = RELOC(systemcfg); union lmb_reg_property reg; unsigned long lmb_base, lmb_size; unsigned long num_regs, bytes_per_reg = (_prom->encode_phys_size*2)/8; lmb_init(); + /* XXX Quick HACK. Proper fix is to drop those structures and properly use + * #address-cells. PowerMac has #size-cell set to 1 and #address-cells to 2 + */ + if (_systemcfg->platform == PLATFORM_POWERMAC) + bytes_per_reg = 12; + for (node = 0; prom_next_node(&node); ) { type[0] = 0; call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), @@ -535,7 +553,15 @@ prom_initialize_lmb(unsigned long mem) ®, sizeof(reg)) / bytes_per_reg; for (i=0; i < num_regs ;i++) { - if (_prom->encode_phys_size == 32) { + if (_systemcfg->platform == PLATFORM_POWERMAC) { + lmb_base = ((unsigned long)reg.addrPM[i].address_hi) << 32; + lmb_base |= (unsigned long)reg.addrPM[i].address_lo; + lmb_size = reg.addrPM[i].size; + if (lmb_base > 0x80000000ull) { + prom_print(RELOC("Skipping memory above 2Gb for now, not yet supported\n")); + continue; + } + } else if (_prom->encode_phys_size == 32) { lmb_base = reg.addr32[i].address; lmb_size = reg.addr32[i].size; } else { @@ -920,19 +946,47 @@ prom_hold_cpus(unsigned long mem) struct paca_struct *_xPaca = PTRRELOC(&paca[0]); struct prom_t *_prom = PTRRELOC(&prom); + /* On pmac, we just fill out the various global bitmasks and + * arrays indicating our CPUs are here, they are actually started + * later on from pmac_smp + */ + if (_systemcfg->platform == PLATFORM_POWERMAC) { + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"), + type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + reg = -1; + call_prom(RELOC("getprop"), 4, 1, node, RELOC("reg"), + ®, sizeof(reg)); + _xPaca[cpuid].xHwProcNum = reg; + +#ifdef CONFIG_SMP + cpu_set(cpuid, RELOC(cpu_available_map)); + cpu_set(cpuid, RELOC(cpu_possible_map)); + cpu_set(cpuid, RELOC(cpu_present_at_boot)); + if (reg == 0) + cpu_set(cpuid, RELOC(cpu_online_map)); +#endif /* CONFIG_SMP */ + cpuid++; + } + return; + } + /* Initially, we must have one active CPU. */ _systemcfg->processorCount = 1; #ifdef DEBUG_PROM prom_print(RELOC("prom_hold_cpus: start...\n")); prom_print(RELOC(" 1) spinloop = 0x")); - prom_print_hex(spinloop); + prom_print_hex((unsigned long)spinloop); prom_print_nl(); prom_print(RELOC(" 1) *spinloop = 0x")); prom_print_hex(*spinloop); prom_print_nl(); prom_print(RELOC(" 1) acknowledge = 0x")); - prom_print_hex(acknowledge); + prom_print_hex((unsigned long)acknowledge); prom_print_nl(); prom_print(RELOC(" 1) *acknowledge = 0x")); prom_print_hex(*acknowledge); @@ -1210,6 +1264,144 @@ smt_setup(void) _naca->smt_state = my_smt_enabled; } + +#ifdef CONFIG_BOOTX_TEXT + +/* This function will enable the early boot text when doing OF booting. This + * way, xmon output should work too + */ +static void __init setup_disp_fake_bi(ihandle dp) +{ + int width = 640, height = 480, depth = 8, pitch; + unsigned address; + struct pci_reg_property addrs[8]; + int i, naddrs; + char name[64]; + unsigned long offset = reloc_offset(); + char *getprop = RELOC("getprop"); + + prom_print(RELOC("Initializing fake screen: ")); + + memset(name, 0, sizeof(name)); + call_prom(getprop, 4, 1, dp, RELOC("name"), name, sizeof(name)); + name[sizeof(name)-1] = 0; + prom_print(name); + prom_print(RELOC("\n")); + call_prom(getprop, 4, 1, dp, RELOC("width"), &width, sizeof(width)); + call_prom(getprop, 4, 1, dp, RELOC("height"), &height, sizeof(height)); + call_prom(getprop, 4, 1, dp, RELOC("depth"), &depth, sizeof(depth)); + pitch = width * ((depth + 7) / 8); + call_prom(getprop, 4, 1, dp, RELOC("linebytes"), + &pitch, sizeof(pitch)); + if (pitch == 1) + pitch = 0x1000; /* for strange IBM display */ + address = 0; + + prom_print(RELOC("width ")); + prom_print_hex(width); + prom_print(RELOC(" height ")); + prom_print_hex(height); + prom_print(RELOC(" depth ")); + prom_print_hex(depth); + prom_print(RELOC(" linebytes ")); + prom_print_hex(pitch); + prom_print(RELOC("\n")); + + + call_prom(getprop, 4, 1, dp, RELOC("address"), + &address, sizeof(address)); + if (address == 0) { + /* look for an assigned address with a size of >= 1MB */ + naddrs = (int) call_prom(getprop, 4, 1, dp, + RELOC("assigned-addresses"), + addrs, sizeof(addrs)); + naddrs /= sizeof(struct pci_reg_property); + for (i = 0; i < naddrs; ++i) { + if (addrs[i].size_lo >= (1 << 20)) { + address = addrs[i].addr.a_lo; + /* use the BE aperture if possible */ + if (addrs[i].size_lo >= (16 << 20)) + address += (8 << 20); + break; + } + } + if (address == 0) { + prom_print(RELOC("Failed to get address of frame buffer\n")); + return; + } + } + btext_setup_display(width, height, depth, pitch, address); + prom_print(RELOC("Addr of fb: ")); + prom_print_hex(address); + prom_print_nl(); + RELOC(boot_text_mapped) = 0; +} +#endif /* CONFIG_BOOTX_TEXT */ + +static void __init prom_init_client_services(unsigned long pp) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + + /* Get a handle to the prom entry point before anything else */ + _prom->entry = pp; + + /* Init default value for phys size */ + _prom->encode_phys_size = 32; + + /* get a handle for the stdout device */ + _prom->chosen = (ihandle)call_prom(RELOC("finddevice"), 1, 1, + RELOC("/chosen")); + if ((long)_prom->chosen <= 0) + prom_panic(RELOC("cannot find chosen")); /* msg won't be printed :( */ + + /* get device tree root */ + _prom->root = (ihandle)call_prom(RELOC("finddevice"), 1, 1, RELOC("/")); + if ((long)_prom->root <= 0) + prom_panic(RELOC("cannot find device tree root")); /* msg won't be printed :( */ +} + +static void __init prom_init_stdout(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + u32 val; + + if ((long)call_prom(RELOC("getprop"), 4, 1, _prom->chosen, + RELOC("stdout"), &val, + sizeof(val)) <= 0) + prom_panic(RELOC("cannot find stdout")); + + _prom->stdout = (ihandle)(unsigned long)val; +} + +static int __init prom_find_machine_type(void) +{ + unsigned long offset = reloc_offset(); + struct prom_t *_prom = PTRRELOC(&prom); + char compat[256]; + int len, i = 0; + + len = (int)(long)call_prom(RELOC("getprop"), 4, 1, _prom->root, + RELOC("compatible"), + compat, sizeof(compat)-1); + if (len > 0) { + compat[len] = 0; + while (i < len) { + char *p = &compat[i]; + int sl = strlen(p); + if (sl == 0) + break; + if (strstr(p, RELOC("Power Macintosh")) || + strstr(p, RELOC("MacRISC4"))) + return PLATFORM_POWERMAC; + i += sl + 1; + } + } + /* Default to pSeries */ + return PLATFORM_PSERIES; +} + /* * We enter here early on, when the Open Firmware prom is still * handling exceptions and the MMU hash table for us. @@ -1220,7 +1412,7 @@ prom_init(unsigned long r3, unsigned lon unsigned long r6, unsigned long r7) { unsigned long mem; - ihandle prom_root, prom_cpu; + ihandle prom_cpu; phandle cpu_pkg; unsigned long offset = reloc_offset(); long l; @@ -1239,36 +1431,27 @@ prom_init(unsigned long r3, unsigned lon RELOC(systemcfg) = _systemcfg = (struct systemcfg *)(SYSTEMCFG_VIRT_ADDR - offset); RELOC(naca) = (struct naca_struct *)(NACA_VIRT_ADDR - offset); - /* Default machine type. */ - _systemcfg->platform = PLATFORM_PSERIES; + /* Init interface to Open Firmware and pickup bi-recs */ + prom_init_client_services(pp); -#if 0 - /* Reset klimit to take into account the embedded system map */ - if (RELOC(embedded_sysmap_end)) - RELOC(klimit) = __va(PAGE_ALIGN(RELOC(embedded_sysmap_end))); -#endif + /* Init prom stdout device */ + prom_init_stdout(); - /* Get a handle to the prom entry point before anything else */ - _prom->entry = pp; + /* check out if we have bi_recs */ _prom->bi_recs = prom_bi_rec_verify((struct bi_record *)r6); - if ( _prom->bi_recs != NULL ) { - RELOC(klimit) = PTRUNRELOC((unsigned long)_prom->bi_recs + _prom->bi_recs->data[1]); - } - - /* First get a handle for the stdout device */ - _prom->chosen = (ihandle)call_prom(RELOC("finddevice"), 1, 1, - RELOC("/chosen")); + if ( _prom->bi_recs != NULL ) + RELOC(klimit) = PTRUNRELOC((unsigned long)_prom->bi_recs + + _prom->bi_recs->data[1]); - if ((long)_prom->chosen <= 0) - prom_panic(RELOC("cannot find chosen")); /* msg won't be printed :( */ + /* Default machine type. */ + _systemcfg->platform = prom_find_machine_type(); - if ((long)call_prom(RELOC("getprop"), 4, 1, _prom->chosen, - RELOC("stdout"), &getprop_rval, - sizeof(getprop_rval)) <= 0) - prom_panic(RELOC("cannot find stdout")); - _prom->stdout = (ihandle)(unsigned long)getprop_rval; + /* On pSeries, copy the CPU hold code */ + if (_systemcfg->platform == PLATFORM_PSERIES) + copy_and_flush(0, KERNELBASE - offset, 0x100, 0); + /* Start storing things at klimit */ mem = RELOC(klimit) - offset; /* Get the full OF pathname of the stdout device */ @@ -1279,13 +1462,10 @@ prom_init(unsigned long r3, unsigned lon mem += strlen(p) + 1; getprop_rval = 1; - prom_root = (ihandle)call_prom(RELOC("finddevice"), 1, 1, RELOC("/")); - if (prom_root != (ihandle)-1) { - call_prom(RELOC("getprop"), 4, 1, - prom_root, RELOC("#size-cells"), - &getprop_rval, sizeof(getprop_rval)); - } - _prom->encode_phys_size = (getprop_rval==1) ? 32 : 64; + call_prom(RELOC("getprop"), 4, 1, + _prom->root, RELOC("#size-cells"), + &getprop_rval, sizeof(getprop_rval)); + _prom->encode_phys_size = (getprop_rval == 1) ? 32 : 64; /* Determine which cpu is actually running right _now_ */ if ((long)call_prom(RELOC("getprop"), 4, 1, _prom->chosen, @@ -1324,13 +1504,24 @@ prom_init(unsigned long r3, unsigned lon mem = DOUBLEWORD_ALIGN(mem + strlen(d) + 1); } + RELOC(cmd_line[0]) = 0; + if ((long)_prom->chosen > 0) { + call_prom(RELOC("getprop"), 4, 1, _prom->chosen, + RELOC("bootargs"), p, sizeof(cmd_line)); + if (p != NULL && p[0] != 0) + strncpy(RELOC(cmd_line), p, sizeof(cmd_line)); + } + RELOC(cmd_line[sizeof(cmd_line) - 1]) = 0; + + mem = prom_initialize_lmb(mem); mem = prom_bi_rec_reserve(mem); mem = check_display(mem); - prom_instantiate_rtas(); + if (_systemcfg->platform != PLATFORM_POWERMAC) + prom_instantiate_rtas(); /* Initialize some system info into the Naca early... */ mem = prom_initialize_naca(mem); @@ -1341,7 +1532,7 @@ prom_init(unsigned long r3, unsigned lon * following, regardless of whether we have an SMP * kernel or not. */ - prom_hold_cpus(mem); + prom_hold_cpus(mem); #ifdef DEBUG_PROM prom_print(RELOC("copying OF device tree...\n")); @@ -1355,6 +1546,13 @@ prom_init(unsigned long r3, unsigned lon if (_systemcfg->platform == PLATFORM_PSERIES) prom_initialize_tce_table(); +#ifdef CONFIG_BOOTX_TEXT + if(_prom->disp_node) { + prom_print(RELOC("Setting up bi display...\n")); + setup_disp_fake_bi(_prom->disp_node); + } +#endif /* CONFIG_BOOTX_TEXT */ + prom_print(RELOC("Calling quiesce ...\n")); call_prom(RELOC("quiesce"), 0, 0); phys = KERNELBASE - offset; @@ -1471,6 +1669,10 @@ check_display(unsigned long mem) RELOC(prom_display_paths[i]) = PTRUNRELOC(path); if (RELOC(prom_num_displays) >= FB_MAX) break; + /* XXX Temporary workaround: only open the first display so we don't + * lose debug output + */ + break; } return DOUBLEWORD_ALIGN(mem); } @@ -1684,9 +1886,9 @@ finish_node(struct device_node *np, unsi np->type = get_property(np, "device_type", 0); /* get the device addresses and interrupts */ - if (ifunc != NULL) { - mem_start = ifunc(np, mem_start, naddrc, nsizec); - } + if (ifunc != NULL) + mem_start = ifunc(np, mem_start, naddrc, nsizec); + mem_start = finish_node_interrupts(np, mem_start); /* Look for #address-cells and #size-cells properties. */ @@ -1700,7 +1902,6 @@ finish_node(struct device_node *np, unsi /* the f50 sets the name to 'display' and 'compatible' to what we * expect for the name -- Cort */ - ifunc = NULL; if (!strcmp(np->name, "display")) np->name = get_property(np, "compatible", 0); @@ -1710,8 +1911,19 @@ finish_node(struct device_node *np, unsi ifunc = NULL; else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci")) ifunc = interpret_pci_props; + else if (!strcmp(np->type, "dbdma")) + ifunc = interpret_dbdma_props; + else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props) + ifunc = interpret_macio_props; else if (!strcmp(np->type, "isa")) ifunc = interpret_isa_props; + else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3")) + ifunc = interpret_root_props; + else if (!((ifunc == interpret_dbdma_props + || ifunc == interpret_macio_props) + && (!strcmp(np->type, "escc") + || !strcmp(np->type, "media-bay")))) + ifunc = NULL; for (child = np->child; child != NULL; child = child->sibling) mem_start = finish_node(child, mem_start, ifunc, @@ -1888,6 +2100,12 @@ finish_node_interrupts(struct device_nod if (n <= 0) continue; np->intrs[i].line = openpic_to_irq(virt_irq_create_mapping(irq[0])); + /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ + if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) { + char *name = get_property(ic->parent, "name", NULL); + if (name && !strcmp(name, "u3")) + np->intrs[i].line += 128; + } if (n > 1) np->intrs[i].sense = irq[1]; if (n > 2) { @@ -1960,6 +2178,78 @@ interpret_pci_props(struct device_node * } static unsigned long __init +interpret_dbdma_props(struct device_node *np, unsigned long mem_start, + int naddrc, int nsizec) +{ + struct reg_property32 *rp; + struct address_range *adr; + unsigned long base_address; + int i, l; + struct device_node *db; + + base_address = 0; + for (db = np->parent; db != NULL; db = db->parent) { + if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) { + base_address = db->addrs[0].address; + break; + } + } + + rp = (struct reg_property32 *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct reg_property32)) { + i = 0; + adr = (struct address_range *) mem_start; + while ((l -= sizeof(struct reg_property32)) >= 0) { + adr[i].space = 2; + adr[i].address = rp[i].address + base_address; + adr[i].size = rp[i].size; + ++i; + } + np->addrs = adr; + np->n_addrs = i; + mem_start += i * sizeof(struct address_range); + } + + return mem_start; +} + +static unsigned long __init +interpret_macio_props(struct device_node *np, unsigned long mem_start, + int naddrc, int nsizec) +{ + struct reg_property32 *rp; + struct address_range *adr; + unsigned long base_address; + int i, l; + struct device_node *db; + + base_address = 0; + for (db = np->parent; db != NULL; db = db->parent) { + if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) { + base_address = db->addrs[0].address; + break; + } + } + + rp = (struct reg_property32 *) get_property(np, "reg", &l); + if (rp != 0 && l >= sizeof(struct reg_property32)) { + i = 0; + adr = (struct address_range *) mem_start; + while ((l -= sizeof(struct reg_property32)) >= 0) { + adr[i].space = 2; + adr[i].address = rp[i].address + base_address; + adr[i].size = rp[i].size; + ++i; + } + np->addrs = adr; + np->n_addrs = i; + mem_start += i * sizeof(struct address_range); + } + + return mem_start; +} + +static unsigned long __init interpret_isa_props(struct device_node *np, unsigned long mem_start, int naddrc, int nsizec) { @@ -2207,6 +2497,7 @@ struct device_node *of_find_node_by_name read_unlock(&devtree_lock); return np; } +EXPORT_SYMBOL(of_find_node_by_name); /** * of_find_node_by_type - Find a node by its "device_type" property @@ -2270,6 +2561,7 @@ struct device_node *of_find_compatible_n read_unlock(&devtree_lock); return np; } +EXPORT_SYMBOL(of_find_compatible_node); /** * of_find_node_by_path - Find a node matching a full OF path @@ -2290,6 +2582,7 @@ struct device_node *of_find_node_by_path read_unlock(&devtree_lock); return np; } +EXPORT_SYMBOL(of_find_node_by_path); /** * of_find_all_nodes - Get next node in global list @@ -2313,6 +2606,7 @@ struct device_node *of_find_all_nodes(st read_unlock(&devtree_lock); return np; } +EXPORT_SYMBOL(of_find_all_nodes); /** * of_get_parent - Get a node's parent if any @@ -2333,6 +2627,7 @@ struct device_node *of_get_parent(const read_unlock(&devtree_lock); return np; } +EXPORT_SYMBOL(of_get_parent); /** * of_get_next_child - Iterate a node childs @@ -2357,6 +2652,7 @@ struct device_node *of_get_next_child(co read_unlock(&devtree_lock); return next; } +EXPORT_SYMBOL(of_get_next_child); /** * of_node_get - Increment refcount of a node @@ -2373,6 +2669,7 @@ struct device_node *of_node_get(struct d } return NULL; } +EXPORT_SYMBOL(of_node_get); /** * of_node_put - Decrement refcount of a node @@ -2396,6 +2693,7 @@ void of_node_put(struct device_node *nod else atomic_dec(&node->_users); } +EXPORT_SYMBOL(of_node_put); /** * of_node_cleanup - release a dynamically allocated node @@ -2653,6 +2951,12 @@ static int of_finish_dynamic_node(struct goto out; } + /* We don't support that function on PowerMac, at least + * not yet + */ + if (systemcfg->platform == PLATFORM_POWERMAC) + return -ENODEV; + /* do the work of interpret_pci_props */ if (parent->type && !strcmp(parent->type, "pci")) { struct address_range *adr; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/pSeries_nvram.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,150 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /dev/nvram driver for PPC64 + * + * This perhaps should live in drivers/char + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nvram_size; +static unsigned int nvram_fetch, nvram_store; +static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ +static spinlock_t nvram_lock = SPIN_LOCK_UNLOCKED; + + +static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len, done; + unsigned long flags; + char *p = buf; + + if (nvram_size == 0 || nvram_fetch) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + memcpy(p, nvram_buf, len); + + p += len; + i += len; + } + + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len, done; + unsigned long flags; + const char *p = buf; + + if (nvram_size == 0 || nvram_store) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + memcpy(nvram_buf, p, len); + + if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + p += len; + i += len; + } + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_get_size(void) +{ + return nvram_size ? nvram_size : -ENODEV; +} + +int __init pSeries_nvram_init(void) +{ + struct device_node *nvram; + unsigned int *nbytes_p, proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return -ENODEV; + + nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) + return -EIO; + + nvram_size = *nbytes_p; + + nvram_fetch = rtas_token("nvram-fetch"); + nvram_store = rtas_token("nvram-store"); + printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read = pSeries_nvram_read; + ppc_md.nvram_write = pSeries_nvram_write; + ppc_md.nvram_size = pSeries_nvram_get_size; + + return 0; +} --- linux-2.6.3-rc2/arch/ppc64/kernel/pSeries_pci.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/pSeries_pci.c 2004-02-12 00:39:25.000000000 -0800 @@ -687,7 +687,7 @@ static void phbs_fixup_io(void) extern void chrp_request_regions(void); -void __init pcibios_final_fixup(void) +void __init pSeries_final_fixup(void) { struct pci_dev *dev = NULL; --- linux-2.6.3-rc2/arch/ppc64/kernel/rtasd.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/kernel/rtasd.c 2004-02-12 00:40:54.000000000 -0800 @@ -336,8 +336,6 @@ static int get_eventscan_parms(void) return 0; } -extern long sys_sched_get_priority_max(int policy); - static int rtasd(void *unused) { unsigned int err_type; --- linux-2.6.3-rc2/arch/ppc64/kernel/rtas-proc.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/rtas-proc.c 2004-02-12 00:40:49.000000000 -0800 @@ -287,9 +287,9 @@ static ssize_t ppc_rtas_poweron_read(str char stkbuf[40]; /* its small, its on stack */ int n, sn; if (power_on_time == 0) - n = snprintf(stkbuf, 40, "Power on time not set\n"); + n = scnprintf(stkbuf,sizeof(stkbuf),"Power on time not set\n"); else - n = snprintf(stkbuf, 40, "%lu\n", power_on_time); + n = scnprintf(stkbuf,sizeof(stkbuf),"%lu\n",power_on_time); sn = strlen (stkbuf) +1; if (*ppos >= sn) @@ -410,9 +410,10 @@ static ssize_t ppc_rtas_clock_read(struc if (error != 0){ printk(KERN_WARNING "error: reading the clock returned: %s\n", ppc_rtas_process_error(error)); - n = snprintf (stkbuf, 40, "0"); + n = scnprintf (stkbuf, sizeof(stkbuf), "0"); } else { - n = snprintf (stkbuf, 40, "%lu\n", mktime(year, mon, day, hour, min, sec)); + n = scnprintf (stkbuf, sizeof(stkbuf), "%lu\n", + mktime(year, mon, day, hour, min, sec)); } kfree(ret); @@ -819,7 +820,7 @@ int get_location_code(struct individual_ n += check_location_string(ret, buffer + n); n += sprintf ( buffer+n, " "); /* see how many characters we have printed */ - snprintf ( t, 50, "%s ", ret); + scnprintf(t, sizeof(t), "%s ", ret); pos += strlen(t); if (pos >= llen) pos=0; @@ -863,7 +864,7 @@ static ssize_t ppc_rtas_tone_freq_read(s int n, sn; char stkbuf[40]; /* its small, its on stack */ - n = snprintf(stkbuf, 40, "%lu\n", rtas_tone_frequency); + n = scnprintf(stkbuf, 40, "%lu\n", rtas_tone_frequency); sn = strlen (stkbuf) +1; if (*ppos >= sn) @@ -917,7 +918,7 @@ static ssize_t ppc_rtas_tone_volume_read int n, sn; char stkbuf[40]; /* its small, its on stack */ - n = snprintf(stkbuf, 40, "%lu\n", rtas_tone_volume); + n = scnprintf(stkbuf, 40, "%lu\n", rtas_tone_volume); sn = strlen (stkbuf) +1; if (*ppos >= sn) --- linux-2.6.3-rc2/arch/ppc64/kernel/setup.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/setup.c 2004-02-12 00:39:25.000000000 -0800 @@ -40,6 +40,8 @@ #include #include #include +#include +#include extern unsigned long klimit; /* extern void *stab; */ @@ -54,10 +56,17 @@ extern void chrp_init(unsigned long r3, unsigned long r6, unsigned long r7); +extern void pmac_init(unsigned long r3, + unsigned long r4, + unsigned long r5, + unsigned long r6, + unsigned long r7); + extern void iSeries_init( void ); extern void iSeries_init_early( void ); extern void pSeries_init_early( void ); extern void pSeriesLP_init_early(void); +extern void pmac_init_early(void); extern void mm_init_ppc64( void ); extern void pseries_secondary_smp_init(unsigned long); extern int idle_setup(void); @@ -68,6 +77,8 @@ unsigned long decr_overclock_proc0 = 1; unsigned long decr_overclock_set = 0; unsigned long decr_overclock_proc0_set = 0; +int powersave_nap; + #ifdef CONFIG_XMON extern void xmon_map_scc(void); #endif @@ -188,9 +199,25 @@ void setup_system(unsigned long r3, unsi #endif parse_bootinfo(); break; +#endif /* CONFIG_PPC_PSERIES */ +#ifdef CONFIG_PPC_PMAC + case PLATFORM_POWERMAC: + pmac_init_early(); +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = initrd_end = 0; #endif + parse_bootinfo(); +#endif /* CONFIG_PPC_PMAC */ } +#ifdef CONFIG_BOOTX_TEXT + map_boot_text(); + if (systemcfg->platform == PLATFORM_POWERMAC) { + early_console_initialized = 1; + register_console(&udbg_console); + } +#endif /* CONFIG_BOOTX_TEXT */ + #ifdef CONFIG_PPC_PSERIES if (systemcfg->platform & PLATFORM_PSERIES) { early_console_initialized = 1; @@ -206,14 +233,23 @@ void setup_system(unsigned long r3, unsi rtas_call(rtas_token("start-cpu"), 3, 1, (void *)&ret, get_hard_smp_processor_id(i), - *((unsigned long *)pseries_secondary_smp_init), i); + *((unsigned long *)pseries_secondary_smp_init), + i); cpu_set(i, cpu_possible_map); systemcfg->processorCount++; } } -#endif } -#endif +#endif /* CONFIG_SMP */ +#endif /* CONFIG_PPC_PSERIES */ + +#ifdef CONFIG_PPC_PMAC + if (systemcfg->platform == PLATFORM_POWERMAC) { + finish_device_tree(); + pmac_init(r3, r4, r5, r6, r7); + } +#endif /* CONFIG_PPC_PMAC */ + /* Finish initializing the hash table (do the dynamic * patching for the fast-path hashtable.S code) */ @@ -226,7 +262,7 @@ void setup_system(unsigned long r3, unsi printk("naca->pftSize = 0x%lx\n", naca->pftSize); printk("naca->debug_switch = 0x%lx\n", naca->debug_switch); printk("naca->interrupt_controller = 0x%ld\n", naca->interrupt_controller); - printk("systemcfg = 0x%p\n", systemcfg); + printk("systemcfg = 0x%p\n", systemcfg); printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); printk("systemcfg->dCacheL1LineSize = 0x%x\n", systemcfg->dCacheL1LineSize); @@ -261,6 +297,8 @@ void setup_system(unsigned long r3, unsi void machine_restart(char *cmd) { + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); ppc_md.restart(cmd); } @@ -268,6 +306,8 @@ EXPORT_SYMBOL(machine_restart); void machine_power_off(void) { + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); ppc_md.power_off(); } @@ -275,6 +315,8 @@ EXPORT_SYMBOL(machine_power_off); void machine_halt(void) { + if (ppc_md.nvram_sync) + ppc_md.nvram_sync(); ppc_md.halt(); } --- linux-2.6.3-rc2/arch/ppc64/kernel/smp.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/kernel/smp.c 2004-02-12 00:39:25.000000000 -0800 @@ -62,7 +62,7 @@ cpumask_t cpu_present_at_boot = CPU_MASK EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); -static struct smp_ops_t *smp_ops; +struct smp_ops_t *smp_ops; static volatile unsigned int cpu_callin_map[NR_CPUS]; @@ -76,13 +76,8 @@ extern long register_vpa(unsigned long f #define smp_message_pass(t,m,d,w) smp_ops->message_pass((t),(m),(d),(w)) -static inline void set_tb(unsigned int upper, unsigned int lower) -{ - mttbl(0); - mttbu(upper); - mttbl(lower); -} - +/* Low level assembly function used to backup CPU 0 state */ +extern void __save_cpu_setup(void); #ifdef CONFIG_PPC_ISERIES static unsigned long iSeries_smp_message[NR_CPUS]; @@ -182,21 +177,23 @@ static void __devinit smp_iSeries_setup_ { } +static struct smp_ops_t iSeries_smp_ops = { + .message_pass = smp_iSeries_message_pass, + .probe = smp_iSeries_probe, + .kick_cpu = smp_iSeries_kick_cpu, + .setup_cpu = smp_iSeries_setup_cpu, +}; + /* This is called very early. */ void __init smp_init_iSeries(void) { - smp_ops = &ppc_md.smp_ops; - smp_ops->message_pass = smp_iSeries_message_pass; - smp_ops->probe = smp_iSeries_probe; - smp_ops->kick_cpu = smp_iSeries_kick_cpu; - smp_ops->setup_cpu = smp_iSeries_setup_cpu; + smp_ops = &iSeries_smp_ops; systemcfg->processorCount = smp_iSeries_numProcs(); } #endif #ifdef CONFIG_PPC_PSERIES -static void -smp_openpic_message_pass(int target, int msg, unsigned long data, int wait) +void smp_openpic_message_pass(int target, int msg, unsigned long data, int wait) { /* make sure we're sending something that translates to an IPI */ if ( msg > 0x3 ){ @@ -240,8 +237,7 @@ static void __devinit smp_openpic_setup_ do_openpic_setup_cpu(); } -static void -smp_kick_cpu(int nr) +static void smp_pSeries_kick_cpu(int nr) { /* Verify we have a Paca for processor nr */ if ( ( nr <= 0 ) || @@ -290,8 +286,7 @@ void vpa_init(int cpu) register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].xLpPaca))); } -static void -smp_xics_message_pass(int target, int msg, unsigned long data, int wait) +static void smp_xics_message_pass(int target, int msg, unsigned long data, int wait) { int i; @@ -358,27 +353,34 @@ static void __devinit pSeries_take_timeb spin_unlock(&timebase_lock); } +static struct smp_ops_t pSeries_openpic_smp_ops = { + .message_pass = smp_openpic_message_pass, + .probe = smp_openpic_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_openpic_setup_cpu, +}; + +static struct smp_ops_t pSeries_xics_smp_ops = { + .message_pass = smp_xics_message_pass, + .probe = smp_xics_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_xics_setup_cpu, +}; + /* This is called very early */ void __init smp_init_pSeries(void) { - smp_ops = &ppc_md.smp_ops; - if (naca->interrupt_controller == IC_OPEN_PIC) { - smp_ops->message_pass = smp_openpic_message_pass; - smp_ops->probe = smp_openpic_probe; - smp_ops->setup_cpu = smp_openpic_setup_cpu; - } else { - smp_ops->message_pass = smp_xics_message_pass; - smp_ops->probe = smp_xics_probe; - smp_ops->setup_cpu = smp_xics_setup_cpu; - } + if (naca->interrupt_controller == IC_OPEN_PIC) + smp_ops = &pSeries_openpic_smp_ops; + else + smp_ops = &pSeries_xics_smp_ops; + /* Non-lpar has additional take/give timebase */ if (systemcfg->platform == PLATFORM_PSERIES) { smp_ops->give_timebase = pSeries_give_timebase; smp_ops->take_timebase = pSeries_take_timebase; } - - smp_ops->kick_cpu = smp_kick_cpu; } #endif @@ -613,6 +615,10 @@ void __init smp_prepare_cpus(unsigned in #endif max_cpus = smp_ops->probe(); + + /* Backup CPU 0 state if necessary */ + __save_cpu_setup(); + smp_space_timers(max_cpus); } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc64/kernel/smp-tbsync.c 2004-02-12 00:39:25.000000000 -0800 @@ -0,0 +1,179 @@ +/* + * Smp timebase synchronization for ppc. + * + * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_ITER 300 + +enum { + kExit=0, kSetAndTest, kTest +}; + +static struct { + volatile long tb; + volatile long mark; + volatile int cmd; + volatile int handshake; + int filler[3]; + + volatile int ack; + int filler2[7]; + + volatile int race_result; +} *tbsync; + +static volatile int running; + +static void __devinit +enter_contest( long mark, long add ) +{ + while( (long)(mftb() - mark) < 0 ) + tbsync->race_result = add; +} + +void __devinit +smp_generic_take_timebase( void ) +{ + int cmd; + long tb; + + local_irq_disable(); + while( !running ) + ; + rmb(); + + for( ;; ) { + tbsync->ack = 1; + while( !tbsync->handshake ) + ; + rmb(); + + cmd = tbsync->cmd; + tb = tbsync->tb; + tbsync->ack = 0; + if( cmd == kExit ) + return; + + if( cmd == kSetAndTest ) { + while( tbsync->handshake ) + ; + asm volatile ("mttbl %0" :: "r" (tb & 0xfffffffful) ); + asm volatile ("mttbu %0" :: "r" (tb >> 32) ); + } else { + while( tbsync->handshake ) + ; + } + enter_contest( tbsync->mark, -1 ); + } + local_irq_enable(); +} + +static int __devinit +start_contest( int cmd, long offset, long num ) +{ + int i, score=0; + long tb, mark; + + tbsync->cmd = cmd; + + local_irq_disable(); + for( i=-3; itb = tb + offset; + tbsync->mark = mark = tb + 400; + + wmb(); + + tbsync->handshake = 1; + while( tbsync->ack ) + ; + + while( (long)(mftb() - tb) <= 0 ) + ; + tbsync->handshake = 0; + enter_contest( mark, 1 ); + + while( !tbsync->ack ) + ; + + if ((tbsync->tb ^ (long)mftb()) & 0x8000000000000000ul) + continue; + if( i++ > 0 ) + score += tbsync->race_result; + } + local_irq_enable(); + return score; +} + +void __devinit +smp_generic_give_timebase( void ) +{ + int i, score, score2, old, min=0, max=5000, offset=1000; + + printk("Synchronizing timebase\n"); + + /* if this fails then this kernel won't work anyway... */ + tbsync = kmalloc( sizeof(*tbsync), GFP_KERNEL ); + memset( tbsync, 0, sizeof(*tbsync) ); + mb(); + running = 1; + + while( !tbsync->ack ) + ; + + printk("Got ack\n"); + + /* binary search */ + for( old=-1 ; old != offset ; offset=(min+max)/2 ) { + score = start_contest( kSetAndTest, offset, NUM_ITER ); + + printk("score %d, offset %d\n", score, offset ); + + if( score > 0 ) + max = offset; + else + min = offset; + old = offset; + } + score = start_contest( kSetAndTest, min, NUM_ITER ); + score2 = start_contest( kSetAndTest, max, NUM_ITER ); + + printk( "Min %d (score %d), Max %d (score %d)\n", min, score, max, score2 ); + score = abs( score ); + score2 = abs( score2 ); + offset = (score < score2) ? min : max; + + /* guard against inaccurate mttb */ + for( i=0; i<10; i++ ) { + start_contest( kSetAndTest, offset, NUM_ITER/10 ); + + if( (score2=start_contest(kTest, offset, NUM_ITER)) < 0 ) + score2 = -score2; + if( score2 <= score || score2 < 20 ) + break; + } + printk("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER ); + + /* exiting */ + tbsync->cmd = kExit; + wmb(); + tbsync->handshake = 1; + while( tbsync->ack ) + ; + tbsync->handshake = 0; + kfree( tbsync ); + tbsync = NULL; + running = 0; +} --- linux-2.6.3-rc2/arch/ppc64/kernel/sys_ppc32.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/kernel/sys_ppc32.c 2004-02-12 00:40:55.000000000 -0800 @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include #include @@ -65,6 +67,7 @@ #include #include #include +#include #include @@ -76,6 +79,8 @@ #include #include +#include "pci.h" + typedef ssize_t (*io_fn_t)(struct file *, char *, size_t, loff_t *); typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); @@ -776,8 +781,6 @@ int cp_compat_stat(struct kstat *stat, s return err; } -extern asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1153,8 +1156,6 @@ struct sysinfo32 { char _f[20-2*sizeof(int)-sizeof(int)]; }; -extern asmlinkage long sys_sysinfo(struct sysinfo *info); - asmlinkage long sys32_sysinfo(struct sysinfo32 *info) { struct sysinfo s; @@ -1866,8 +1867,6 @@ asmlinkage long sys32_ipc(u32 call, u32 return err; } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t* offset, size_t count); - /* Note: it is necessary to treat out_fd and in_fd as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1892,8 +1891,6 @@ asmlinkage long sys32_sendfile(u32 out_f return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t *offset, size_t count); - asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2156,9 +2153,6 @@ void start_thread32(struct pt_regs* regs #endif /* CONFIG_ALTIVEC */ } -extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2173,14 +2167,12 @@ asmlinkage long sys32_prctl(u32 option, (unsigned long) arg5); } -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) * and the register representation of a signed int (msr in 64-bit mode) is performed. */ -asmlinkage int sys32_sched_rr_get_interval(u32 pid, struct compat_timespec *interval) +asmlinkage long sys32_sched_rr_get_interval(u32 pid, struct compat_timespec *interval) { struct timespec t; int ret; @@ -2194,9 +2186,6 @@ asmlinkage int sys32_sched_rr_get_interv return ret; } -extern asmlinkage int sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, - unsigned long len, unsigned char *buf); - asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) { return sys_pciconfig_read((unsigned long) bus, @@ -2206,12 +2195,6 @@ asmlinkage int sys32_pciconfig_read(u32 (unsigned char *)AA(ubuf)); } - - - -extern asmlinkage int sys_pciconfig_write(unsigned long bus, unsigned long dfn, unsigned long off, - unsigned long len, unsigned char *buf); - asmlinkage int sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) { return sys_pciconfig_write((unsigned long) bus, @@ -2221,7 +2204,63 @@ asmlinkage int sys32_pciconfig_write(u32 (unsigned char *)AA(ubuf)); } -extern asmlinkage int sys_newuname(struct new_utsname * name); +#define IOBASE_BRIDGE_NUMBER 0 +#define IOBASE_MEMORY 1 +#define IOBASE_IO 2 +#define IOBASE_ISA_IO 3 +#define IOBASE_ISA_MEM 4 + +asmlinkage int sys32_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn) +{ + struct pci_controller* hose; + struct list_head *ln; + struct pci_bus *bus = NULL; + struct device_node *hose_node; + + /* Argh ! Please forgive me for that hack, but that's the + * simplest way to get existing XFree to not lockup on some + * G5 machines... So when something asks for bus 0 io base + * (bus 0 is HT root), we return the AGP one instead. + */ +#ifdef CONFIG_PPC_PMAC + if (systemcfg->platform == PLATFORM_POWERMAC && + machine_is_compatible("MacRISC4")) + if (in_bus == 0) + in_bus = 0xf0; +#endif /* CONFIG_PPC_PMAC */ + + /* That syscall isn't quite compatible with PCI domains, but it's + * used on pre-domains setup. We return the first match + */ + + for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { + bus = pci_bus_b(ln); + if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate)) + break; + bus = NULL; + } + if (bus == NULL || bus->sysdata == NULL) + return -ENODEV; + + hose_node = (struct device_node *)bus->sysdata; + hose = hose_node->phb; + + switch (which) { + case IOBASE_BRIDGE_NUMBER: + return (long)hose->first_busno; + case IOBASE_MEMORY: + return (long)hose->pci_mem_offset; + case IOBASE_IO: + return (long)hose->io_base_phys; + case IOBASE_ISA_IO: + return (long)isa_io_base; + case IOBASE_ISA_MEM: + return -EINVAL; + } + + return -EOPNOTSUPP; +} + asmlinkage int ppc64_newuname(struct new_utsname * name) { @@ -2235,8 +2274,6 @@ asmlinkage int ppc64_newuname(struct new return errno; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int ppc64_personality(unsigned long personality) { int ret; @@ -2250,8 +2287,6 @@ asmlinkage int ppc64_personality(unsigne -extern asmlinkage long sys_access(const char * filename, int mode); - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2263,8 +2298,6 @@ asmlinkage long sys32_access(const char } -extern asmlinkage long sys_creat(const char * pathname, int mode); - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2276,8 +2309,6 @@ asmlinkage long sys32_creat(const char * } -extern asmlinkage long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); - /* Note: it is necessary to treat pid and options as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2289,8 +2320,6 @@ asmlinkage long sys32_waitpid(u32 pid, u } -extern asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist); - /* Note: it is necessary to treat gidsetsize as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2302,8 +2331,6 @@ asmlinkage long sys32_getgroups(u32 gids } -extern asmlinkage long sys_getpgid(pid_t pid); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2315,8 +2342,6 @@ asmlinkage long sys32_getpgid(u32 pid) } -extern asmlinkage long sys_getpriority(int which, int who); - /* Note: it is necessary to treat which and who as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2328,8 +2353,6 @@ asmlinkage long sys32_getpriority(u32 wh } -extern asmlinkage long sys_getsid(pid_t pid); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2340,7 +2363,6 @@ asmlinkage long sys32_getsid(u32 pid) return sys_getsid((int)pid); } -extern asmlinkage long sys_kill(int pid, int sig); /* Note: it is necessary to treat pid and sig as unsigned ints, * with the corresponding cast to a signed int to insure that the @@ -2353,8 +2375,6 @@ asmlinkage long sys32_kill(u32 pid, u32 } -extern asmlinkage long sys_mkdir(const char * pathname, int mode); - /* Note: it is necessary to treat mode as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2365,16 +2385,12 @@ asmlinkage long sys32_mkdir(const char * return sys_mkdir(pathname, (int)mode); } -extern asmlinkage long sys_nice(int increment); - long sys32_nice(u32 increment) { /* sign extend increment */ return sys_nice((int)increment); } -extern off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin); - off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) { /* sign extend n */ @@ -2412,8 +2428,6 @@ out_error: goto out; } -extern asmlinkage long sys_readlink(const char * path, char * buf, int bufsiz); - /* Note: it is necessary to treat bufsiz as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2424,8 +2438,6 @@ asmlinkage long sys32_readlink(const cha return sys_readlink(path, buf, (int)bufsiz); } -extern asmlinkage long sys_sched_get_priority_max(int policy); - /* Note: it is necessary to treat option as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2437,8 +2449,6 @@ asmlinkage long sys32_sched_get_priority } -extern asmlinkage long sys_sched_get_priority_min(int policy); - /* Note: it is necessary to treat policy as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2450,8 +2460,6 @@ asmlinkage long sys32_sched_get_priority } -extern asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2463,8 +2471,6 @@ asmlinkage long sys32_sched_getparam(u32 } -extern asmlinkage long sys_sched_getscheduler(pid_t pid); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2476,8 +2482,6 @@ asmlinkage long sys32_sched_getscheduler } -extern asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param); - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2489,8 +2493,6 @@ asmlinkage long sys32_sched_setparam(u32 } -extern asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param *param); - /* Note: it is necessary to treat pid and policy as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2502,8 +2504,6 @@ asmlinkage long sys32_sched_setscheduler } -extern asmlinkage long sys_setdomainname(char *name, int len); - /* Note: it is necessary to treat len as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2515,8 +2515,6 @@ asmlinkage long sys32_setdomainname(char } -extern asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist); - /* Note: it is necessary to treat gidsetsize as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2528,8 +2526,6 @@ asmlinkage long sys32_setgroups(u32 gids } -extern asmlinkage long sys_sethostname(char *name, int len); - asmlinkage long sys32_sethostname(char *name, u32 len) { /* sign extend len */ @@ -2537,8 +2533,6 @@ asmlinkage long sys32_sethostname(char * } -extern asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); - /* Note: it is necessary to treat pid and pgid as unsigned ints, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2550,16 +2544,12 @@ asmlinkage long sys32_setpgid(u32 pid, u } -extern asmlinkage long sys_setpriority(int which, int who, int niceval); - long sys32_setpriority(u32 which, u32 who, u32 niceval) { /* sign extend which, who and niceval */ return sys_setpriority((int)which, (int)who, (int)niceval); } -extern asmlinkage long sys_ssetmask(int newmask); - /* Note: it is necessary to treat newmask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2570,8 +2560,6 @@ asmlinkage long sys32_ssetmask(u32 newma return sys_ssetmask((int) newmask); } -extern asmlinkage long sys_syslog(int type, char * buf, int len); - long sys32_syslog(u32 type, char * buf, u32 len) { /* sign extend len */ @@ -2579,8 +2567,6 @@ long sys32_syslog(u32 type, char * buf, } -extern asmlinkage long sys_umask(int mask); - /* Note: it is necessary to treat mask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2592,8 +2578,6 @@ asmlinkage long sys32_umask(u32 mask) } -extern asmlinkage long sys_umount(char * name, int flags); - /* Note: it is necessary to treat flags as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -2696,10 +2680,6 @@ int sys32_olduname(struct oldold_utsname return error; } -extern unsigned long sys_mmap(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset); - unsigned long sys32_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -2741,8 +2721,6 @@ long sys32_utimes(char *filename, struct return ret; } -extern long sys_tgkill(int tgid, int pid, int sig); - long sys32_tgkill(u32 tgid, u32 pid, int sig) { /* sign extend tgid, pid */ @@ -2753,11 +2731,6 @@ long sys32_tgkill(u32 tgid, u32 pid, int * long long munging: * The 32 bit ABI passes long longs in an odd even register pair. */ -extern ssize_t sys_pread64(unsigned int fd, char *buf, size_t count, - loff_t pos); - -extern ssize_t sys_pwrite64(unsigned int fd, const char *buf, size_t count, - loff_t pos); compat_ssize_t sys32_pread64(unsigned int fd, char *ubuf, compat_size_t count, u32 reg6, u32 poshi, u32 poslo) @@ -2771,16 +2744,11 @@ compat_ssize_t sys32_pwrite64(unsigned i return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); } -extern ssize_t sys_readahead(int fd, loff_t offset, size_t count); - compat_ssize_t sys32_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) { return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); } -extern asmlinkage long sys_truncate(const char * path, unsigned long length); -extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys32_truncate64(const char * path, u32 reg4, unsigned long high, unsigned long low) { @@ -2793,8 +2761,6 @@ asmlinkage int sys32_ftruncate64(unsigne return sys_ftruncate(fd, (high << 32) | low); } -extern long sys_lookup_dcookie(u64 cookie64, char *buf, size_t len); - long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char *buf, size_t len) { --- linux-2.6.3-rc2/arch/ppc64/kernel/udbg.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/kernel/udbg.c 2004-02-12 00:39:25.000000000 -0800 @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include struct NS16550 { /* this struct must be packed */ @@ -69,6 +72,61 @@ udbg_init_uart(void *comport) } } +#ifdef CONFIG_PPC_PMAC + +#define SCC_TXRDY 4 +#define SCC_RXRDY 1 + +static volatile u8 *sccc, *sccd; + +static unsigned char scc_inittab[] = { + 13, 0, /* set baud rate divisor */ + 12, 0, + 14, 1, /* baud rate gen enable, src=rtxc */ + 11, 0x50, /* clocks = br gen */ + 5, 0xea, /* tx 8 bits, assert DTR & RTS */ + 4, 0x46, /* x16 clock, 1 stop */ + 3, 0xc1, /* rx enable, 8 bits */ +}; + +void +udbg_init_scc(struct device_node *np) +{ + unsigned long addr; + int i, x; + + if (np == NULL) + np = of_find_node_by_name(NULL, "escc"); + if (np == NULL) + return; + + /* Lock-enable the SCC channel */ + pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1); + + /* Setup for 57600 8N1 */ + addr = np->addrs[0].address + 0x20; + sccc = (volatile u8 *) ioremap(addr & PAGE_MASK, PAGE_SIZE) ; + sccc += addr & ~PAGE_MASK; + sccd = sccc + 0x10; + + for (i = 20000; i != 0; --i) + x = *sccc; eieio(); + *sccc = 9; eieio(); /* reset A or B side */ + *sccc = 0xc0; eieio(); + for (i = 0; i < sizeof(scc_inittab); ++i) { + *sccc = scc_inittab[i]; + eieio(); + } + + ppc_md.udbg_putc = udbg_putc; + ppc_md.udbg_getc = udbg_getc; + ppc_md.udbg_getc_poll = udbg_getc_poll; + + udbg_puts("Hello World !\n"); +} + +#endif /* CONFIG_PPC_PMAC */ + void udbg_putc(unsigned char c) { @@ -83,6 +141,16 @@ udbg_putc(unsigned char c) udbg_comport->thr = '\r'; eieio(); } } +#ifdef CONFIG_PPC_PMAC + else if (sccc) { + while ((*sccc & SCC_TXRDY) == 0) + eieio(); + *sccd = c; + eieio(); + if (c == '\n') + udbg_putc('\r'); + } +#endif /* CONFIG_PPC_PMAC */ } int udbg_getc_poll(void) @@ -93,6 +161,15 @@ int udbg_getc_poll(void) else return -1; } +#ifdef CONFIG_PPC_PMAC + else if (sccc) { + eieio(); + if ((*sccc & SCC_RXRDY) != 0) + return *sccd; + else + return -1; + } +#endif /* CONFIG_PPC_PMAC */ return -1; } @@ -104,6 +181,14 @@ udbg_getc(void) /* wait for char */; return udbg_comport->rbr; } +#ifdef CONFIG_PPC_PMAC + else if (sccc) { + eieio(); + while ((*sccc & SCC_RXRDY) == 0) + eieio(); + return *sccd; + } +#endif /* CONFIG_PPC_PMAC */ return 0; } @@ -149,6 +234,8 @@ udbg_read(char *buf, int buflen) { do { c = ppc_md.udbg_getc(); } while (c == 0x11 || c == 0x13); + if (c == 0) + break; *p++ = c; } return i; --- linux-2.6.3-rc2/arch/ppc64/mm/hash_low.S 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/mm/hash_low.S 2004-02-12 00:39:25.000000000 -0800 @@ -95,7 +95,7 @@ _GLOBAL(__hash_page) /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 bne- 1b - + isync /* Step 2: * --- linux-2.6.3-rc2/arch/ppc64/mm/hash_utils.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/ppc64/mm/hash_utils.c 2004-02-12 00:39:25.000000000 -0800 @@ -140,7 +140,8 @@ void __init htab_initialize(void) htab_data.htab_num_ptegs = pteg_count; htab_data.htab_hash_mask = pteg_count - 1; - if (systemcfg->platform == PLATFORM_PSERIES) { + if (systemcfg->platform == PLATFORM_PSERIES || + systemcfg->platform == PLATFORM_POWERMAC) { /* Find storage for the HPT. Must be contiguous in * the absolute address space. */ --- linux-2.6.3-rc2/arch/ppc64/xmon/xmon.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc64/xmon/xmon.c 2004-02-12 00:39:25.000000000 -0800 @@ -847,7 +847,8 @@ bpt_cmds(void) break; } - if (!(systemcfg->platform & PLATFORM_PSERIES)) { + if (systemcfg->platform != PLATFORM_POWERMAC && + !(systemcfg->platform & PLATFORM_PSERIES)) { printf("Not supported for this platform\n"); break; } --- linux-2.6.3-rc2/arch/ppc/8260_io/enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8260_io/enet.c 2004-02-12 00:39:30.000000000 -0800 @@ -851,7 +851,7 @@ static int __init scc_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3-rc2/arch/ppc/8260_io/fcc_enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8260_io/fcc_enet.c 2004-02-12 00:39:30.000000000 -0800 @@ -1371,7 +1371,7 @@ static int __init fec_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3-rc2/arch/ppc/8xx_io/enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8xx_io/enet.c 2004-02-12 00:39:30.000000000 -0800 @@ -949,7 +949,7 @@ static int __init scc_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.3-rc2/arch/ppc/8xx_io/fec.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/ppc/8xx_io/fec.c 2004-02-12 00:39:30.000000000 -0800 @@ -1748,7 +1748,7 @@ static int __init fec_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/ppc/boot/common/bootinfo.c 2004-02-12 00:39:51.000000000 -0800 @@ -0,0 +1,70 @@ +/* + * arch/ppc/common/bootinfo.c + * + * General bootinfo record utilities + * Author: Randy Vinson + * + * 2002 (c) MontaVista Software, Inc. This file is licensed under the terms + * of the GNU General Public License version 2. This program is licensed + * "as is" without any warranty of any kind, whether express or implied. + */ + +#include +#include +#include + +#include "nonstdio.h" + +static struct bi_record * birec = NULL; + +static struct bi_record * +__bootinfo_build(struct bi_record *rec, unsigned long tag, unsigned long size, + void *data) +{ + /* set the tag */ + rec->tag = tag; + + /* if the caller has any data, copy it */ + if (size) + memcpy(rec->data, (char *)data, size); + + /* set the record size */ + rec->size = sizeof(struct bi_record) + size; + + /* advance to the next available space */ + rec = (struct bi_record *)((unsigned long)rec + rec->size); + + return rec; +} + +void +bootinfo_init(struct bi_record *rec) +{ + + /* save start of birec area */ + birec = rec; + + /* create an empty list */ + rec = __bootinfo_build(rec, BI_FIRST, 0, NULL); + (void) __bootinfo_build(rec, BI_LAST, 0, NULL); + +} + +void +bootinfo_append(unsigned long tag, unsigned long size, void * data) +{ + + struct bi_record *rec = birec; + + /* paranoia */ + if ((rec == NULL) || (rec->tag != BI_FIRST)) + return; + + /* find the last entry in the list */ + while (rec->tag != BI_LAST) + rec = (struct bi_record *)((ulong)rec + rec->size); + + /* overlay BI_LAST record with new one and tag on a new BI_LAST */ + rec = __bootinfo_build(rec, tag, size, data); + (void) __bootinfo_build(rec, BI_LAST, 0, NULL); +} --- linux-2.6.3-rc2/arch/ppc/boot/ld.script 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/ppc/boot/ld.script 2004-02-12 00:41:25.000000000 -0800 @@ -82,6 +82,7 @@ SECTIONS *(__ksymtab) *(__ksymtab_strings) *(__bug_table) + *(__kcrctab) } } --- linux-2.6.3-rc2/arch/ppc/boot/simple/Makefile 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ppc/boot/simple/Makefile 2004-02-12 00:39:51.000000000 -0800 @@ -30,7 +30,7 @@ tftpboot := /tftpboot # Normally, we use the 'misc.c' file for decompress_kernel and # whatnot. Sometimes we need to override this however. -misc-y := misc.o +misc-y := misc.o ../common/bootinfo.o # Normally, we have our images end in .elf, but something we want to # change this. @@ -103,7 +103,7 @@ zimageinitrd-$(pcore) := zImage.initrd zimageinitrd-$(CONFIG_SPRUCE) := zImage.initrd-TREE end-$(CONFIG_SPRUCE) := spruce entrypoint-$(CONFIG_SPRUCE) := 0x00800000 - misc-$(CONFIG_SPRUCE) := misc-spruce.o + misc-$(CONFIG_SPRUCE) += misc-spruce.o # SMP images should have a '.smp' suffix. end-$(CONFIG_SMP) += .smp --- linux-2.6.3-rc2/arch/ppc/boot/simple/misc.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/boot/simple/misc.c 2004-02-12 00:39:51.000000000 -0800 @@ -1,5 +1,5 @@ /* - * arch/ppc/common/misc-simple.c + * arch/ppc/simple/misc.c * * Misc. bootloader code for many machines. This assumes you have are using * a 6xx/7xx/74xx CPU in your machine. This assumes the chunk of memory @@ -46,6 +46,15 @@ #define HAS_KEYB 0 #endif +/* Will / Can the user give input? + * Val Henson has requested that Gemini doesn't wait for the + * user to edit the cmdline or not. + */ +#if (defined(CONFIG_SERIAL_8250_CONSOLE) || defined(CONFIG_VGA_CONSOLE)) \ + && !defined(CONFIG_GEMINI) +#define INTERACTIVE_CONSOLE 1 +#endif + char *avail_ram; char *end_avail; char *zimage_start; @@ -66,19 +75,28 @@ extern char _end[]; extern unsigned long start; extern int CRT_tstc(void); -extern unsigned long get_mem_size(void); extern unsigned long serial_init(int chan, void *ignored); extern void serial_close(unsigned long com_port); extern void gunzip(void *, int, unsigned char *, int *); extern void serial_fixups(void); +/* Allow get_mem_size to be hooked into. This is the default. */ +unsigned long __attribute__ ((weak)) +get_mem_size(void) +{ + return 0; +} + struct bi_record * decompress_kernel(unsigned long load_addr, int num_words, unsigned long cksum) { +#ifdef INTERACTIVE_CONSOLE int timer = 0; - char *cp, ch; + char ch; +#endif + char *cp; struct bi_record *rec; - unsigned long TotalMemory = 0, rec_loc, initrd_loc; + unsigned long initrd_loc, TotalMemory = 0; serial_fixups(); com_port = serial_init(0, NULL); @@ -93,13 +111,11 @@ decompress_kernel(unsigned long load_add __asm__ __volatile__("eieio"); #endif -#if defined(CONFIG_LOPEC) || defined(CONFIG_PAL4) /* * Call get_mem_size(), which is memory controller dependent, * and we must have the correct file linked in here. */ TotalMemory = get_mem_size(); -#endif /* assume the chunk below 8M is free */ end_avail = (char *)0x00800000; @@ -170,9 +186,11 @@ decompress_kernel(unsigned long load_add memcpy (cmd_line, cmd_preset, sizeof(cmd_preset)); while ( *cp ) putc(*cp++); -#ifndef CONFIG_GEMINI - /* Val Henson has requested that Gemini doesn't wait for the - * user to edit the cmdline or not. */ +#ifdef INTERACTIVE_CONSOLE + /* + * If they have a console, allow them to edit the command line. + * Otherwise, don't bother wasting the five seconds. + */ while (timer++ < 5*1000) { if (tstc()) { while ((ch = getc()) != '\n' && ch != '\r') { @@ -205,16 +223,13 @@ decompress_kernel(unsigned long load_add gunzip(0, 0x400000, zimage_start, &zimage_size); puts("done.\n"); - /* - * Create bi_recs for cmd_line and initrds - */ - rec_loc = _ALIGN((unsigned long)(zimage_size) + - (1 << 20) - 1, (1 << 20)); - rec = (struct bi_record *)rec_loc; + /* get the bi_rec address */ + rec = bootinfo_addr(zimage_size); /* We need to make sure that the initrd and bi_recs do not * overlap. */ if ( initrd_size ) { + unsigned long rec_loc = (unsigned long) rec; initrd_loc = (unsigned long)(&__ramdisk_begin); /* If the bi_recs are in the middle of the current * initrd, move the initrd to the next MB @@ -231,39 +246,25 @@ decompress_kernel(unsigned long load_add } } - rec->tag = BI_FIRST; - rec->size = sizeof(struct bi_record); - rec = (struct bi_record *)((unsigned long)rec + rec->size); - - if ( TotalMemory ) { - rec->tag = BI_MEMSIZE; - rec->data[0] = TotalMemory; - rec->size = sizeof(struct bi_record) + sizeof(unsigned long); - rec = (struct bi_record *)((unsigned long)rec + rec->size); - } + bootinfo_init(rec); + if ( TotalMemory ) + bootinfo_append(BI_MEMSIZE, sizeof(int), (void*)&TotalMemory); - rec->tag = BI_CMD_LINE; - memcpy( (char *)rec->data, cmd_line, strlen(cmd_line)+1); - rec->size = sizeof(struct bi_record) + strlen(cmd_line) + 1; - rec = (struct bi_record *)((unsigned long)rec + rec->size); + bootinfo_append(BI_CMD_LINE, strlen(cmd_line)+1, (void*)cmd_line); - if ( initrd_size ) { - rec->tag = BI_INITRD; - rec->data[0] = initrd_loc; - rec->data[1] = initrd_size; - rec->size = sizeof(struct bi_record) + 2 * - sizeof(unsigned long); - rec = (struct bi_record *)((unsigned long)rec + - rec->size); - } + /* add a bi_rec for the initrd if it exists */ + if (initrd_size) { + unsigned long initrd[2]; - rec->tag = BI_LAST; - rec->size = sizeof(struct bi_record); - rec = (struct bi_record *)((unsigned long)rec + rec->size); + initrd[0] = initrd_loc; + initrd[1] = initrd_size; + + bootinfo_append(BI_INITRD, sizeof(initrd), &initrd); + } puts("Now booting the kernel\n"); serial_close(com_port); - return (struct bi_record *)rec_loc; + return rec; } /* Allow decompress_kernel to be hooked into. This is the default. */ --- linux-2.6.3-rc2/arch/ppc/boot/simple/misc-spruce.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/boot/simple/misc-spruce.c 2004-02-12 00:39:51.000000000 -0800 @@ -15,55 +15,19 @@ */ #include -#include #include #include -#include -#include #include -#include "zlib.h" +extern unsigned long decompress_kernel(unsigned long load_addr, int num_words, + unsigned long cksum); /* Define some important locations of the Spruce. */ #define SPRUCE_PCI_CONFIG_ADDR 0xfec00000 #define SPRUCE_PCI_CONFIG_DATA 0xfec00004 #define SPRUCE_ISA_IO_BASE 0xf8000000 -unsigned long com_port; - -char *avail_ram; -char *end_avail; - -/* The linker tells us where the image is. */ -extern char __image_begin, __image_end; -extern char __ramdisk_begin, __ramdisk_end; -extern char _end[]; - -#ifdef CONFIG_CMDLINE -#define CMDLINE CONFIG_CMDLINE -#else -#define CMDLINE "" -#endif -char cmd_preset[] = CMDLINE; -char cmd_buf[256]; -char *cmd_line = cmd_buf; - -unsigned long initrd_size = 0; - -char *zimage_start; -int zimage_size; - -extern void udelay(long); -extern void puts(const char *); -extern void putc(const char c); -extern void puthex(unsigned long val); -extern int getc(void); -extern int tstc(void); -extern void gunzip(void *, int, unsigned char *, int *); - -extern unsigned long serial_init(int chan, void *ignored); - /* PCI configuration space access routines. */ unsigned int *pci_config_address = (unsigned int *)SPRUCE_PCI_CONFIG_ADDR; unsigned char *pci_config_data = (unsigned char *)SPRUCE_PCI_CONFIG_DATA; @@ -146,45 +110,16 @@ unsigned long isa_io_base = SPRUCE_ISA_I #define MEM_B2EA 0x60 unsigned long -load_kernel(unsigned long load_addr, int num_words, unsigned long cksum) +get_mem_size(void) { - int timer = 0; - char *cp, ch; - int loop; - int csr0; - int csr_id; - int *mem_addr = (int *)0xff500008; - int *mem_data = (int *)0xff50000c; - int mem_size = 0; + unsigned long mem_size = 0; unsigned long mem_mben; unsigned long mem_type; unsigned long mem_start; unsigned long mem_end; - int *pif_addr = (int *)0xff500000; - int *pif_data = (int *)0xff500004; - int pci_devfn; - int found_multi = 0; - unsigned short vendor; - unsigned short device; - unsigned short command; - unsigned char header_type; - unsigned int bar0; - -#ifdef CONFIG_SERIAL_8250_CONSOLE - /* Initialize the serial console port */ - com_port = serial_init(0, NULL); -#endif - - /* - * Gah, these firmware guys need to learn that hardware - * byte swapping is evil! Disable all hardware byte - * swapping so it doesn't hurt anyone. - */ - *pif_addr = PLBMIFOPT; - asm("sync"); - *pif_data = 0x00000000; - asm("sync"); + volatile int *mem_addr = (int *)0xff500008; + volatile int *mem_data = (int *)0xff50000c; /* Get the size of memory from the memory controller. */ *mem_addr = MEM_MBEN; @@ -235,6 +170,33 @@ load_kernel(unsigned long load_addr, int mem_size += mem_end - mem_start + 0x100000; } + return mem_size; +} + +unsigned long +load_kernel(unsigned long load_addr, int num_words, unsigned long cksum) +{ + int csr0; + int csr_id; + int pci_devfn; + int found_multi = 0; + unsigned short vendor; + unsigned short device; + unsigned short command; + unsigned char header_type; + unsigned int bar0; + volatile int *pif_addr = (int *)0xff500000; + volatile int *pif_data = (int *)0xff500004; + + /* + * Gah, these firmware guys need to learn that hardware + * byte swapping is evil! Disable all hardware byte + * swapping so it doesn't hurt anyone. + */ + *pif_addr = PLBMIFOPT; + asm("sync"); + *pif_data = 0x00000000; + asm("sync"); /* Search out and turn off the PcNet ethernet boot device. */ for (pci_devfn = 1; pci_devfn < 0xff; pci_devfn++) { @@ -310,134 +272,5 @@ load_kernel(unsigned long load_addr, int } } - /* assume the chunk below 8M is free */ - end_avail = (char *)0x00800000; - - /* - * We link ourself to 0x00800000. When we run, we relocate - * ourselves there. So we just need __image_begin for the - * start. -- Tom - */ - zimage_start = (char *)(unsigned long)(&__image_begin); - zimage_size = (unsigned long)(&__image_end) - - (unsigned long)(&__image_begin); - - initrd_size = (unsigned long)(&__ramdisk_end) - - (unsigned long)(&__ramdisk_begin); - - /* - * The zImage and initrd will be between start and _end, so they've - * already been moved once. We're good to go now. -- Tom - */ - avail_ram = (char *)PAGE_ALIGN((unsigned long)_end); - puts("zimage at: "); puthex((unsigned long)zimage_start); - puts(" "); puthex((unsigned long)(zimage_size+zimage_start)); - puts("\n"); - - if ( initrd_size ) { - puts("initrd at: "); - puthex((unsigned long)(&__ramdisk_begin)); - puts(" "); puthex((unsigned long)(&__ramdisk_end));puts("\n"); - } - - avail_ram = (char *)0x00400000; - end_avail = (char *)0x00800000; - puts("avail ram: "); puthex((unsigned long)avail_ram); puts(" "); - puthex((unsigned long)end_avail); puts("\n"); - - /* Display standard Linux/PPC boot prompt for kernel args */ - puts("\nLinux/PPC load: "); - cp = cmd_line; - memcpy (cmd_line, cmd_preset, sizeof(cmd_preset)); - while ( *cp ) putc(*cp++); - while (timer++ < 5*1000) { - if (tstc()) { - while ((ch = getc()) != '\n' && ch != '\r') { - if (ch == '\b') { - if (cp != cmd_line) { - cp--; - puts("\b \b"); - } - } else { - *cp++ = ch; - putc(ch); - } - } - break; /* Exit 'timer' loop */ - } - udelay(1000); /* 1 msec */ - } - *cp = 0; - puts("\n"); - - puts("Uncompressing Linux..."); - - gunzip(0, 0x400000, zimage_start, &zimage_size); - - puts("done.\n"); - - { - struct bi_record *rec; - unsigned long initrd_loc; - unsigned long rec_loc = _ALIGN((unsigned long)(zimage_size) + - (1 << 20) - 1, (1 << 20)); - rec = (struct bi_record *)rec_loc; - - /* We need to make sure that the initrd and bi_recs do not - * overlap. */ - if ( initrd_size ) { - initrd_loc = (unsigned long)(&__ramdisk_begin); - /* If the bi_recs are in the middle of the current - * initrd, move the initrd to the next MB - * boundary. */ - if ((rec_loc > initrd_loc) && - ((initrd_loc + initrd_size) - > rec_loc)) { - initrd_loc = _ALIGN((unsigned long)(zimage_size) - + (2 << 20) - 1, (2 << 20)); - memmove((void *)initrd_loc, &__ramdisk_begin, - initrd_size); - puts("initrd moved: "); puthex(initrd_loc); - puts(" "); puthex(initrd_loc + initrd_size); - puts("\n"); - } - } - - rec->tag = BI_FIRST; - rec->size = sizeof(struct bi_record); - rec = (struct bi_record *)((unsigned long)rec + rec->size); - - rec->tag = BI_BOOTLOADER_ID; - memcpy( (void *)rec->data, "spruceboot", 11); - rec->size = sizeof(struct bi_record) + 10 + 1; - rec = (struct bi_record *)((unsigned long)rec + rec->size); - - rec->tag = BI_MEMSIZE; - rec->data[0] = mem_size; - rec->size = sizeof(struct bi_record) + sizeof(unsigned long); - rec = (struct bi_record *)((unsigned long)rec + rec->size); - - rec->tag = BI_CMD_LINE; - memcpy( (char *)rec->data, cmd_line, strlen(cmd_line)+1); - rec->size = sizeof(struct bi_record) + strlen(cmd_line) + 1; - rec = (struct bi_record *)((ulong)rec + rec->size); - - if ( initrd_size ) { - rec->tag = BI_INITRD; - rec->data[0] = initrd_loc; - rec->data[1] = initrd_size; - rec->size = sizeof(struct bi_record) + 2 * - sizeof(unsigned long); - rec = (struct bi_record *)((unsigned long)rec + - rec->size); - } - - rec->tag = BI_LAST; - rec->size = sizeof(struct bi_record); - rec = (struct bi_record *)((unsigned long)rec + rec->size); - } - - puts("Now booting the kernel\n"); - - return 0; + return decompress_kernel(load_addr, num_words, cksum); } --- linux-2.6.3-rc2/arch/ppc/Kconfig 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc/Kconfig 2004-02-12 00:39:51.000000000 -0800 @@ -592,6 +592,11 @@ config PPC_PMAC depends on PPC_MULTIPLATFORM default y +config PPC_PMAC64 + bool + depends on PPC_PMAC && POWER4 + default y + config PPC_PREP bool depends on PPC_MULTIPLATFORM @@ -604,7 +609,7 @@ config PPC_OF config PPC_GEN550 bool - depends on SANDPOINT || MCPN765 + depends on SANDPOINT || MCPN765 || SPRUCE default y config FORCE --- linux-2.6.3-rc2/arch/ppc/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/ppc/kernel/entry.S 2004-02-12 00:39:52.000000000 -0800 @@ -689,8 +689,8 @@ ret_from_crit_exc: mtspr SPRN_ESR,r10 lwz r11,_NIP(r1) lwz r12,_MSR(r1) - mtspr SRR2,r11 - mtspr SRR3,r12 + mtspr CSRR0,r11 + mtspr CSRR1,r12 lwz r9,GPR9(r1) lwz r12,GPR12(r1) lwz r10,crit_sprg0@l(0) --- linux-2.6.3-rc2/arch/ppc/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ppc/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -575,7 +575,7 @@ cpumask_t irq_affinity [NR_IRQS]; static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -616,7 +616,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/ppc/kernel/pci.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc/kernel/pci.c 2004-02-12 00:39:25.000000000 -0800 @@ -1022,8 +1022,31 @@ pci_create_OF_bus_map(void) prom_add_property(find_path_device("/"), of_prop); } } + +static ssize_t pci_show_devspec(struct device *dev, char *buf) +{ + struct pci_dev *pdev; + struct device_node *np; + + pdev = to_pci_dev (dev); + np = pci_device_to_OF_node(pdev); + if (np == NULL || np->full_name == NULL) + return 0; + return sprintf(buf, "%s", np->full_name); +} +static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); + #endif /* CONFIG_PPC_OF */ +/* Add sysfs properties */ +void pcibios_add_platform_entries(struct pci_dev *dev) +{ +#ifdef CONFIG_PPC_OF + device_create_file(&pdev->dev, &dev_attr_devspec); +#endif /* CONFIG_PPC_OF */ +} + + #ifdef CONFIG_PPC_PMAC /* * This set of routines checks for PCI<->PCI bridges that have closed --- linux-2.6.3-rc2/arch/ppc/kernel/syscalls.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ppc/kernel/syscalls.c 2004-02-12 00:40:55.000000000 -0800 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -200,8 +201,6 @@ out: return err; } -extern int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - /* * Due to some executables calling the wrong select we sometimes * get wrong args. This determines how the args are being passed --- linux-2.6.3-rc2/arch/ppc/platforms/Makefile 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc/platforms/Makefile 2004-02-12 00:39:52.000000000 -0800 @@ -20,7 +20,7 @@ obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pm pmac_feature.o pmac_pci.o pmac_sleep.o \ pmac_low_i2c.o obj-$(CONFIG_PPC_CHRP) += chrp_setup.o chrp_time.o chrp_pci.o -obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_time.o prep_setup.o +obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_setup.o ifeq ($(CONFIG_PPC_PMAC),y) obj-$(CONFIG_NVRAM) += pmac_nvram.o obj-$(CONFIG_CPU_FREQ_PMAC) += pmac_cpufreq.o --- linux-2.6.3-rc2/arch/ppc/platforms/pmac_feature.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc/platforms/pmac_feature.c 2004-02-12 00:39:25.000000000 -0800 @@ -2239,7 +2239,7 @@ probe_motherboard(void) break; #else /* CONFIG_POWER4 */ case macio_keylargo2: - pmac_mb.model_id = PMAC_TYPE_POWERMAC_G5; + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; pmac_mb.model_name = "Unknown G5"; pmac_mb.features = g5_features; break; --- linux-2.6.3-rc2/arch/ppc/platforms/prep_setup.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/arch/ppc/platforms/prep_setup.c 2004-02-12 00:39:52.000000000 -0800 @@ -58,16 +58,14 @@ #include #include #include +#include + +TODC_ALLOC(); unsigned char ucSystemType; unsigned char ucBoardRev; unsigned char ucBoardRevMaj, ucBoardRevMin; -extern unsigned long mc146818_get_rtc_time(void); -extern int mc146818_set_rtc_time(unsigned long nowtime); -extern unsigned long mk48t59_get_rtc_time(void); -extern int mk48t59_set_rtc_time(unsigned long nowtime); - extern unsigned char prep_nvram_read_val(int addr); extern void prep_nvram_write_val(int addr, unsigned char val); @@ -814,12 +812,12 @@ prep_setup_arch(void) } /* - * Determine the decrementer frequency from the residual data - * This allows for a faster boot as we do not need to calibrate the - * decrementer against another clock. This is important for embedded systems. + * First, see if we can get this information from the residual data. + * This is important on some IBM PReP systems. If we cannot, we let the + * TODC code handle doing this. */ -static int __init -prep_res_calibrate_decr(void) +static void __init +prep_calibrate_decr(void) { #ifdef CONFIG_PREP_RESIDUAL unsigned long freq, divisor = 4; @@ -831,149 +829,9 @@ prep_res_calibrate_decr(void) (freq/divisor)%1000000); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); tb_ticks_per_jiffy = freq / HZ / divisor; - return 0; } else #endif - return 1; -} - -/* - * Uses the on-board timer to calibrate the on-chip decrementer register - * for prep systems. On the pmac the OF tells us what the frequency is - * but on prep we have to figure it out. - * -- Cort - */ -/* Done with 3 interrupts: the first one primes the cache and the - * 2 following ones measure the interval. The precision of the method - * is still doubtful due to the short interval sampled. - */ -static volatile int calibrate_steps __initdata = 3; -static unsigned tbstamp __initdata = 0; - -static irqreturn_t __init -prep_calibrate_decr_handler(int irq, void *dev, struct pt_regs *regs) -{ - unsigned long t, freq; - int step=--calibrate_steps; - - t = get_tbl(); - if (step > 0) { - tbstamp = t; - } else { - freq = (t - tbstamp)*HZ; - printk("time_init: decrementer frequency = %lu.%.6lu MHz\n", - freq/1000000, freq%1000000); - tb_ticks_per_jiffy = freq / HZ; - tb_to_us = mulhwu_scale_factor(freq, 1000000); - } - return IRQ_HANDLED; -} - -static void __init -prep_calibrate_decr(void) -{ - int res; - - /* Try and get this from the residual data. */ - res = prep_res_calibrate_decr(); - - /* If we didn't get it from the residual data, try this. */ - if ( res ) { -#define TIMER0_COUNT 0x40 -#define TIMER_CONTROL 0x43 - /* set timer to periodic mode */ - outb_p(0x34,TIMER_CONTROL);/* binary, mode 2, LSB/MSB, ch 0 */ - /* set the clock to ~100 Hz */ - outb_p(LATCH & 0xff , TIMER0_COUNT); /* LSB */ - outb(LATCH >> 8 , TIMER0_COUNT); /* MSB */ - - if (request_irq(0, prep_calibrate_decr_handler, 0, "timer", NULL) != 0) - panic("Could not allocate timer IRQ!"); - local_irq_enable(); - /* wait for calibrate */ - while ( calibrate_steps ) - ; - local_irq_disable(); - free_irq( 0, NULL); - } -} - -static long __init -mk48t59_init(void) { - unsigned char tmp; - - tmp = ppc_md.nvram_read_val(MK48T59_RTC_CONTROLB); - if (tmp & MK48T59_RTC_CB_STOP) { - printk("Warning: RTC was stopped, date will be wrong.\n"); - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLB, - tmp & ~MK48T59_RTC_CB_STOP); - /* Low frequency crystal oscillators may take a very long - * time to startup and stabilize. For now just ignore the - * the issue, but attempting to calibrate the decrementer - * from the RTC just after this wakeup is likely to be very - * inaccurate. Firmware should not allow to load - * the OS with the clock stopped anyway... - */ - } - /* Ensure that the clock registers are updated */ - tmp = ppc_md.nvram_read_val(MK48T59_RTC_CONTROLA); - tmp &= ~(MK48T59_RTC_CA_READ | MK48T59_RTC_CA_WRITE); - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, tmp); - return 0; -} - -/* We use the NVRAM RTC to time a second to calibrate the decrementer, - * the RTC registers have just been set up in the right state by the - * preceding routine. - */ -static void __init -mk48t59_calibrate_decr(void) -{ - unsigned long freq; - unsigned long t1; - unsigned char save_control; - long i; - unsigned char sec; - - - /* Make sure the time is not stopped. */ - save_control = ppc_md.nvram_read_val(MK48T59_RTC_CONTROLB); - - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, - (save_control & (~MK48T59_RTC_CB_STOP))); - - /* Now make sure the read bit is off so the value will change. */ - save_control = ppc_md.nvram_read_val(MK48T59_RTC_CONTROLA); - save_control &= ~MK48T59_RTC_CA_READ; - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, save_control); - - - /* Read the seconds value to see when it changes. */ - sec = ppc_md.nvram_read_val(MK48T59_RTC_SECONDS); - /* Actually this is bad for precision, we should have a loop in - * which we only read the seconds counter. nvram_read_val writes - * the address bytes on every call and this takes a lot of time. - * Perhaps an nvram_wait_change method returning a time - * stamp with a loop count as parameter would be the solution. - */ - for (i = 0 ; i < 1000000 ; i++) { /* may take up to 1 second... */ - t1 = get_tbl(); - if (ppc_md.nvram_read_val(MK48T59_RTC_SECONDS) != sec) { - break; - } - } - - sec = ppc_md.nvram_read_val(MK48T59_RTC_SECONDS); - for (i = 0 ; i < 1000000 ; i++) { /* Should take up 1 second... */ - freq = get_tbl()-t1; - if (ppc_md.nvram_read_val(MK48T59_RTC_SECONDS) != sec) - break; - } - - printk("time_init: decrementer frequency = %lu.%.6lu MHz\n", - freq/1000000, freq%1000000); - tb_ticks_per_jiffy = freq / HZ; - tb_to_us = mulhwu_scale_factor(freq, 1000000); + todc_calibrate_decr(); } static unsigned int __prep @@ -1163,18 +1021,19 @@ prep_init(unsigned long r3, unsigned lon ppc_md.nvram_read_val = prep_nvram_read_val; ppc_md.nvram_write_val = prep_nvram_write_val; - ppc_md.time_init = NULL; + ppc_md.time_init = todc_time_init; if (_prep_type == _PREP_IBM) { - ppc_md.set_rtc_time = mc146818_set_rtc_time; - ppc_md.get_rtc_time = mc146818_get_rtc_time; - ppc_md.calibrate_decr = prep_calibrate_decr; + TODC_INIT(TODC_TYPE_MC146818, PREP_NVRAM_AS0, PREP_NVRAM_AS1, + PREP_NVRAM_DATA, 8); } else { - ppc_md.set_rtc_time = mk48t59_set_rtc_time; - ppc_md.get_rtc_time = mk48t59_get_rtc_time; - ppc_md.calibrate_decr = mk48t59_calibrate_decr; - ppc_md.time_init = mk48t59_init; + TODC_INIT(TODC_TYPE_MK48T59, PREP_NVRAM_AS0, PREP_NVRAM_AS1, + PREP_NVRAM_DATA, 8); } + ppc_md.calibrate_decr = prep_calibrate_decr; + ppc_md.set_rtc_time = todc_set_rtc_time; + ppc_md.get_rtc_time = todc_get_rtc_time; + ppc_md.setup_io_mappings = prep_map_io; #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) --- linux-2.6.3-rc2/arch/ppc/platforms/prep_time.c 2003-09-27 18:57:44.000000000 -0700 +++ /dev/null 2002-08-30 16:31:37.000000000 -0700 @@ -1,225 +0,0 @@ -/* - * arch/ppc/platforms/prep_time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds - * - * Adapted for PowerPC (PReP) by Gary Thomas - * Modified by Cort Dougan (cort@cs.nmt.edu). - * Copied and modified from arch/i386/kernel/time.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -extern spinlock_t rtc_lock; - -/* - * The motorola uses the m48t18 rtc (includes DS1643) whose registers - * are at a higher end of nvram (1ff8-1fff) than the ibm mc146818 - * rtc (ds1386) which has regs at addr 0-d). The intel gets - * past this because the bios emulates the mc146818. - * - * Why in the world did they have to use different clocks? - * - * Right now things are hacked to check which machine we're on then - * use the appropriate macro. This is very very ugly and I should - * probably have a function that checks which machine we're on then - * does things correctly transparently or a function pointer which - * is setup at boot time to use the correct addresses. - * -- Cort - */ - -/* - * Set the hardware clock. -- Cort - */ -__prep -int mc146818_set_rtc_time(unsigned long nowtime) -{ - unsigned char save_control, save_freq_select; - struct rtc_time tm; - - spin_lock(&rtc_lock); - to_tm(nowtime, &tm); - - /* tell the clock it's being set */ - save_control = CMOS_READ(RTC_CONTROL); - - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - /* stop and reset prescaler */ - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - tm.tm_year = (tm.tm_year - 1900) % 100; - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - BIN_TO_BCD(tm.tm_sec); - BIN_TO_BCD(tm.tm_min); - BIN_TO_BCD(tm.tm_hour); - BIN_TO_BCD(tm.tm_mon); - BIN_TO_BCD(tm.tm_mday); - BIN_TO_BCD(tm.tm_year); - } - CMOS_WRITE(tm.tm_sec, RTC_SECONDS); - CMOS_WRITE(tm.tm_min, RTC_MINUTES); - CMOS_WRITE(tm.tm_hour, RTC_HOURS); - CMOS_WRITE(tm.tm_mon, RTC_MONTH); - CMOS_WRITE(tm.tm_mday, RTC_DAY_OF_MONTH); - CMOS_WRITE(tm.tm_year, RTC_YEAR); - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return 0; -} - -__prep -unsigned long mc146818_get_rtc_time(void) -{ - unsigned int year, mon, day, hour, min, sec; - int uip, i; - - /* The Linux interpretation of the CMOS clock register contents: - * When the Update-In-Progress (UIP) flag goes from 1 to 0, the - * RTC registers show the second which has precisely just started. - * Let's hope other operating systems interpret the RTC the same way. - */ - - /* Since the UIP flag is set for about 2.2 ms and the clock - * is typically written with a precision of 1 jiffy, trying - * to obtain a precision better than a few milliseconds is - * an illusion. Only consistency is interesting, this also - * allows to use the routine for /dev/rtc without a potential - * 1 second kernel busy loop triggered by any reader of /dev/rtc. - */ - - for ( i = 0; i<1000000; i++) { - uip = CMOS_READ(RTC_FREQ_SELECT); - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - uip |= CMOS_READ(RTC_FREQ_SELECT); - if ((uip & RTC_UIP)==0) break; - } - - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) - { - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - } - if ((year += 1900) < 1970) - year += 100; - return mktime(year, mon, day, hour, min, sec); -} - -__prep -int mk48t59_set_rtc_time(unsigned long nowtime) -{ - unsigned char save_control; - struct rtc_time tm; - - spin_lock(&rtc_lock); - to_tm(nowtime, &tm); - - /* tell the clock it's being written */ - save_control = ppc_md.nvram_read_val(MK48T59_RTC_CONTROLA); - - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, - (save_control | MK48T59_RTC_CA_WRITE)); - - tm.tm_year = (tm.tm_year - 1900) % 100; - BIN_TO_BCD(tm.tm_sec); - BIN_TO_BCD(tm.tm_min); - BIN_TO_BCD(tm.tm_hour); - BIN_TO_BCD(tm.tm_mon); - BIN_TO_BCD(tm.tm_mday); - BIN_TO_BCD(tm.tm_year); - - ppc_md.nvram_write_val(MK48T59_RTC_SECONDS, tm.tm_sec); - ppc_md.nvram_write_val(MK48T59_RTC_MINUTES, tm.tm_min); - ppc_md.nvram_write_val(MK48T59_RTC_HOURS, tm.tm_hour); - ppc_md.nvram_write_val(MK48T59_RTC_MONTH, tm.tm_mon); - ppc_md.nvram_write_val(MK48T59_RTC_DAY_OF_MONTH, tm.tm_mday); - ppc_md.nvram_write_val(MK48T59_RTC_YEAR, tm.tm_year); - - /* Turn off the write bit. */ - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, save_control); - spin_unlock(&rtc_lock); - - return 0; -} - -__prep -unsigned long mk48t59_get_rtc_time(void) -{ - unsigned char save_control; - unsigned int year, mon, day, hour, min, sec; - - /* Simple: freeze the clock, read it and allow updates again */ - save_control = ppc_md.nvram_read_val(MK48T59_RTC_CONTROLA); - save_control &= ~MK48T59_RTC_CA_READ; - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, save_control); - - /* Set the register to read the value. */ - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, - (save_control | MK48T59_RTC_CA_READ)); - - sec = ppc_md.nvram_read_val(MK48T59_RTC_SECONDS); - min = ppc_md.nvram_read_val(MK48T59_RTC_MINUTES); - hour = ppc_md.nvram_read_val(MK48T59_RTC_HOURS); - day = ppc_md.nvram_read_val(MK48T59_RTC_DAY_OF_MONTH); - mon = ppc_md.nvram_read_val(MK48T59_RTC_MONTH); - year = ppc_md.nvram_read_val(MK48T59_RTC_YEAR); - - /* Let the time values change again. */ - ppc_md.nvram_write_val(MK48T59_RTC_CONTROLA, save_control); - - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - - year = year + 1900; - if (year < 1970) { - year += 100; - } - - return mktime(year, mon, day, hour, min, sec); -} --- linux-2.6.3-rc2/arch/ppc/platforms/spruce.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/arch/ppc/platforms/spruce.h 2004-02-12 00:39:51.000000000 -0800 @@ -35,11 +35,37 @@ #define SPRUCE_MEM_SIZE 0x04000000 #define SPRUCE_BUS_SPEED 66666667 -#define SPRUCE_SERIAL_1_ADDR 0xff600300 -#define SPRUCE_SERIAL_2_ADDR 0xff600400 - #define SPRUCE_NVRAM_BASE_ADDR 0xff800000 #define SPRUCE_RTC_BASE_ADDR SPRUCE_NVRAM_BASE_ADDR +/* + * Serial port defines + */ +#define SPRUCE_FPGA_REG_A 0xff820000 +#define SPRUCE_UARTCLK_33M 0x02 +#define SPRUCE_UARTCLK_IS_33M(reg) (reg & SPRUCE_UARTCLK_33M) + +#define UART0_IO_BASE 0xff600300 +#define UART1_IO_BASE 0xff600400 + +#define RS_TABLE_SIZE 2 + +#define SPRUCE_BAUD_33M (33000000/64) +#define SPRUCE_BAUD_30M (30000000/64) +#define BASE_BAUD SPRUCE_BAUD_33M + +#define UART0_INT 3 +#define UART1_INT 4 + +#define STD_UART_OP(num) \ + { 0, BASE_BAUD, 0, UART##num##_INT, \ + ASYNC_BOOT_AUTOCONF, \ + iomem_base: UART##num##_IO_BASE, \ + io_type: SERIAL_IO_MEM}, + +#define SERIAL_PORT_DFNS \ + STD_UART_OP(0) \ + STD_UART_OP(1) + #endif /* __ASM_SPRUCE_H__ */ #endif /* __KERNEL__ */ --- linux-2.6.3-rc2/arch/ppc/platforms/spruce_setup.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/ppc/platforms/spruce_setup.c 2004-02-12 00:39:51.000000000 -0800 @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #include @@ -39,6 +42,7 @@ #include #include #include +#include #include @@ -103,6 +107,46 @@ spruce_show_cpuinfo(struct seq_file *m) return 0; } +static void __init +spruce_early_serial_map(void) +{ + u32 uart_clk; + struct uart_port serial_req; + + if (SPRUCE_UARTCLK_IS_33M(readb(SPRUCE_FPGA_REG_A))) + uart_clk = SPRUCE_BAUD_33M * 16; + else + uart_clk = SPRUCE_BAUD_30M * 16; + + /* Setup serial port access */ + memset(&serial_req, 0, sizeof(serial_req)); + serial_req.uartclk = uart_clk; + serial_req.irq = UART0_INT; + serial_req.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST; + serial_req.iotype = SERIAL_IO_MEM; + serial_req.membase = (u_char *)UART0_IO_BASE; + serial_req.regshift = 0; + + gen550_init(0, &serial_req); + +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&serial_req) != 0) + printk("Early serial init of port 0 failed\n"); +#endif + + /* Assume early_serial_setup() doesn't modify serial_req */ + serial_req.line = 1; + serial_req.irq = UART1_INT; + serial_req.membase = (u_char *)UART1_IO_BASE; + + gen550_init(1, &serial_req); + +#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&serial_req) != 0) + printk("Early serial init of port 1 failed\n"); +#endif +} + TODC_ALLOC(); static void __init @@ -128,10 +172,11 @@ spruce_setup_arch(void) ROOT_DEV = Root_SDA1; #endif -#ifdef CONFIG_DUMMY_CONSOLE +#ifdef CONFIG_VT conswitchp = &dummy_con; #endif + /* Identify the system */ printk(KERN_INFO "System Identification: IBM Spruce\n"); printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n"); @@ -146,12 +191,12 @@ spruce_restart(char *cmd) /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ __asm__ __volatile__ ("\n\ - lis 3,0xfff0 - ori 3,3,0x0100 - mtspr 26,3 - li 3,0 - mtspr 27,3 - rfi + lis 3,0xfff0 \n\ + ori 3,3,0x0100 \n\ + mtspr 26,3 \n\ + li 3,0 \n\ + mtspr 27,3 \n\ + rfi \n\ "); for(;;); } @@ -175,12 +220,27 @@ spruce_map_io(void) 0x08000000, _PAGE_IO); } +/* + * Set BAT 3 to map 0xf8000000 to end of physical memory space 1-to-1. + */ +static __inline__ void +spruce_set_bat(void) +{ + mb(); + mtspr(DBAT1U, 0xf8000ffe); + mtspr(DBAT1L, 0xf800002a); + mb(); +} + void __init platform_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { parse_bootinfo(find_bootinfo()); + /* Map in board regs, etc. */ + spruce_set_bat(); + isa_io_base = SPRUCE_ISA_IO_BASE; pci_dram_offset = SPRUCE_PCI_SYS_MEM_BASE; @@ -202,4 +262,13 @@ platform_init(unsigned long r3, unsigned ppc_md.nvram_read_val = todc_direct_read_val; ppc_md.nvram_write_val = todc_direct_write_val; + + spruce_early_serial_map(); + +#ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; +#endif /* CONFIG_SERIAL_TEXT_DEBUG */ +#ifdef CONFIG_KGDB + ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; +#endif } --- linux-2.6.3-rc2/arch/ppc/syslib/Makefile 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/ppc/syslib/Makefile 2004-02-12 00:39:52.000000000 -0800 @@ -33,7 +33,7 @@ obj-$(CONFIG_PPC_OF) += prom_init.o pro obj-$(CONFIG_PPC_PMAC) += open_pic.o indirect_pci.o obj-$(CONFIG_POWER4) += open_pic2.o obj-$(CONFIG_PPC_CHRP) += open_pic.o indirect_pci.o i8259.o -obj-$(CONFIG_PPC_PREP) += open_pic.o indirect_pci.o i8259.o +obj-$(CONFIG_PPC_PREP) += open_pic.o indirect_pci.o i8259.o todc_time.o obj-$(CONFIG_ADIR) += i8259.o indirect_pci.o pci_auto.o \ todc_time.o obj-$(CONFIG_EBONY) += indirect_pci.o pci_auto.o todc_time.o --- linux-2.6.3-rc2/arch/s390/kernel/compat_linux.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/compat_linux.c 2004-02-12 00:40:55.000000000 -0800 @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -71,17 +72,6 @@ #include "compat_linux.h" -extern asmlinkage long sys_chown(const char *, uid_t,gid_t); -extern asmlinkage long sys_lchown(const char *, uid_t,gid_t); -extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t); -extern asmlinkage long sys_setregid(gid_t, gid_t); -extern asmlinkage long sys_setgid(gid_t); -extern asmlinkage long sys_setreuid(uid_t, uid_t); -extern asmlinkage long sys_setuid(uid_t); -extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t); -extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t); -extern asmlinkage long sys_setfsuid(uid_t); -extern asmlinkage long sys_setfsgid(gid_t); /* For this source file, we want overflow handling. */ @@ -190,40 +180,81 @@ asmlinkage long sys32_setfsgid16(u16 gid return sys_setfsgid((gid_t)gid); } +static int groups16_to_user(u16 *grouplist, struct group_info *group_info) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (u16)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int groups16_from_user(struct group_info *group_info, u16 *grouplist) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} + asmlinkage long sys32_getgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i,j; + int i; if (gidsetsize < 0) return -EINVAL; - i = current->ngroups; + + get_group_info(current->group_info); + i = current->group_info->ngroups; if (gidsetsize) { - if (i > gidsetsize) - return -EINVAL; - for(j=0;jgroups[j]; - if (copy_to_user(grouplist, groups, sizeof(u16)*i)) - return -EFAULT; + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } } +out: + put_group_info(current->group_info); return i; } asmlinkage long sys32_setgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i; + struct group_info *group_info; + int retval; if (!capable(CAP_SETGID)) return -EPERM; - if ((unsigned) gidsetsize > NGROUPS) + if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; - if (copy_from_user(groups, grouplist, gidsetsize * sizeof(u16))) - return -EFAULT; - for (i = 0 ; i < gidsetsize ; i++) - current->groups[i] = (gid_t)groups[i]; - current->ngroups = gidsetsize; - return 0; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; } asmlinkage long sys32_getuid16(void) @@ -884,9 +915,6 @@ out: return err; } -extern asmlinkage long sys_truncate(const char * path, unsigned long length); -extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys32_truncate64(const char * path, unsigned long high, unsigned long low) { if ((int)high < 0) @@ -1356,8 +1384,6 @@ int cp_compat_stat(struct kstat *stat, s return err; } -extern asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2); - asmlinkage int sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); @@ -1531,9 +1557,7 @@ struct sysinfo32 { char _f[8]; }; -extern asmlinkage int sys_sysinfo(struct sysinfo *info); - -asmlinkage int sys32_sysinfo(struct sysinfo32 *info) +asmlinkage int sys32_sysinfo(struct sysinfo32 __user *info) { struct sysinfo s; int ret, err; @@ -1561,10 +1585,8 @@ asmlinkage int sys32_sysinfo(struct sysi return ret; } -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec *interval) + struct compat_timespec __user *interval) { struct timespec t; int ret; @@ -1578,9 +1600,8 @@ asmlinkage int sys32_sched_rr_get_interv return ret; } -extern asmlinkage int sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize); - -asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, compat_size_t sigsetsize) +asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -1614,9 +1635,8 @@ asmlinkage int sys32_rt_sigprocmask(int return 0; } -extern asmlinkage int sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - -asmlinkage int sys32_rt_sigpending(compat_sigset_t *set, compat_size_t sigsetsize) +asmlinkage int sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -1723,11 +1743,8 @@ sys32_rt_sigtimedwait(compat_sigset_t *u return ret; } -extern asmlinkage int -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage int -sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) +sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 __user *uinfo) { siginfo_t info; int ret; @@ -1956,40 +1973,30 @@ out: #ifdef CONFIG_MODULES -extern asmlinkage int sys_init_module(const char *name_user, struct module *mod_user); - -/* Hey, when you're trying to init module, take time and prepare us a nice 64bit - * module structure, even if from 32bit modutils... Why to pollute kernel... :)) - */ -asmlinkage int sys32_init_module(const char *name_user, struct module *mod_user) +asmlinkage int +sys32_init_module(void __user *umod, unsigned long len, + const char __user *uargs) { - return sys_init_module(name_user, mod_user); + return sys_init_module(umod, len, uargs); } -extern asmlinkage int sys_delete_module(const char *name_user); - -asmlinkage int sys32_delete_module(const char *name_user) +asmlinkage int +sys32_delete_module(const char __user *name_user, unsigned int flags) { - return sys_delete_module(name_user); + return sys_delete_module(name_user, flags); } -struct module_info32 { - u32 addr; - u32 size; - u32 flags; - s32 usecount; -}; - #else /* CONFIG_MODULES */ asmlinkage int -sys32_init_module(const char *name_user, struct module *mod_user) +sys32_init_module(void __user *umod, unsigned long len, + const char __user *uargs) { return -ENOSYS; } asmlinkage int -sys32_delete_module(const char *name_user) +sys32_delete_module(const char __user *name_user, unsigned int flags) { return -ENOSYS; } @@ -2153,10 +2160,6 @@ static int nfs_getfh32_res_trans(union n return copy_to_user(res32, kres, sizeof(*res32)) ? -EFAULT : 0; } -/* -asmlinkage long sys_ni_syscall(void); -*/ - int asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) { struct nfsctl_arg *karg = NULL; @@ -2271,9 +2274,8 @@ asmlinkage int sys32_settimeofday(struct return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage int sys_utimes(char *, struct timeval *); - -asmlinkage int sys32_utimes(char *filename, struct compat_timeval *tvs) +asmlinkage int sys32_utimes(char __user *filename, + struct compat_timeval __user *tvs) { char *kfilename; struct timeval ktvs[2]; @@ -2307,8 +2309,6 @@ asmlinkage int sys32_pause(void) return -ERESTARTNOHAND; } -extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); asmlinkage int sys32_prctl(int option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { @@ -2320,12 +2320,6 @@ asmlinkage int sys32_prctl(int option, u } -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); - -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); - asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, char *ubuf, compat_size_t count, u32 poshi, u32 poslo) { @@ -2342,15 +2336,11 @@ asmlinkage compat_ssize_t sys32_pwrite64 return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -extern asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) { return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2370,9 +2360,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, - loff_t *offset, size_t count); - asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t *offset, s32 count) { @@ -2466,8 +2453,6 @@ asmlinkage int sys32_adjtimex(struct tim return ret; } -extern asmlinkage long sys_setpriority(int which, int who, int niceval); - asmlinkage int sys_setpriority32(u32 which, u32 who, u32 niceval) { return sys_setpriority((int) which, @@ -2485,7 +2470,7 @@ struct __sysctl_args32 { u32 __unused[4]; }; -extern asmlinkage long sys32_sysctl(struct __sysctl_args32 *args) +asmlinkage long sys32_sysctl(struct __sysctl_args32 *args) { struct __sysctl_args32 tmp; int error; @@ -2677,8 +2662,6 @@ out: return error; } -asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count); - asmlinkage compat_ssize_t sys32_read(unsigned int fd, char * buf, size_t count) { if ((compat_ssize_t) count < 0) @@ -2687,8 +2670,6 @@ asmlinkage compat_ssize_t sys32_read(uns return sys_read(fd, buf, count); } -asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); - asmlinkage compat_ssize_t sys32_write(unsigned int fd, char * buf, size_t count) { if ((compat_ssize_t) count < 0) --- linux-2.6.3-rc2/arch/s390/kernel/compat_linux.h 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/compat_linux.h 2004-02-12 00:40:52.000000000 -0800 @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include --- linux-2.6.3-rc2/arch/s390/kernel/compat_wrapper.S 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/compat_wrapper.S 2004-02-12 00:40:55.000000000 -0800 @@ -567,13 +567,15 @@ compat_sys_sigprocmask_wrapper: .globl sys32_init_module_wrapper sys32_init_module_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # struct module * + llgtr %r2,%r2 # void * + llgfr %r3,%r3 # unsigned long + llgtr %r4,%r4 # char * jg sys32_init_module # branch to system call .globl sys32_delete_module_wrapper sys32_delete_module_wrapper: llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # unsigned int jg sys32_delete_module # branch to system call .globl sys32_quotactl_wrapper --- linux-2.6.3-rc2/arch/s390/kernel/sys_s390.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/s390/kernel/sys_s390.c 2004-02-12 00:40:55.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -125,8 +126,6 @@ out: return error; } -extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); - #ifndef CONFIG_ARCH_S390X struct sel_arg_struct { unsigned long n; @@ -297,8 +296,6 @@ asmlinkage int sys_ioperm(unsigned long #else /* CONFIG_ARCH_S390X */ -extern asmlinkage int sys_newuname(struct new_utsname * name); - asmlinkage int s390x_newuname(struct new_utsname * name) { int ret = sys_newuname(name); @@ -310,8 +307,6 @@ asmlinkage int s390x_newuname(struct new return ret; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int s390x_personality(unsigned long personality) { int ret; @@ -331,8 +326,6 @@ asmlinkage int s390x_personality(unsigne */ #ifndef CONFIG_ARCH_S390X -extern asmlinkage long sys_fadvise64(int, loff_t, size_t, int); - asmlinkage long s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice) { @@ -342,8 +335,6 @@ s390_fadvise64(int fd, u32 offset_high, #endif -extern asmlinkage long sys_fadvise64_64(int, loff_t, loff_t, int); - struct fadvise64_64_args { int fd; long long offset; --- linux-2.6.3-rc2/arch/sh/Kconfig 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sh/Kconfig 2004-02-12 00:39:25.000000000 -0800 @@ -940,61 +940,7 @@ config PSMOUSE ) solves this problem, or you can get the "mconv2" utility from . - -menu "Watchdog Cards" - -config WATCHDOG - bool "Watchdog Timer Support" - ---help--- - If you say Y here (and to one of the following options) and create a - character special file /dev/watchdog with major number 10 and minor - number 130 using mknod ("man mknod"), you will get a watchdog, i.e.: - subsequently opening the file and then failing to write to it for - longer than 1 minute will result in rebooting the machine. This - could be useful for a networked machine that needs to come back - online as fast as possible after a lock-up. There's both a watchdog - implementation entirely in software (which can sometimes fail to - reboot the machine) and a driver for hardware watchdog boards, which - are more robust and can also keep track of the temperature inside - your computer. For details, read - in the kernel source. - - The watchdog is usually used together with the watchdog daemon - which is available from - . This daemon can - also monitor NFS connections and can reboot the machine when the process - table is full. - - If unsure, say N. - -config WATCHDOG_NOWAYOUT - bool "Disable watchdog shutdown on close" - depends on WATCHDOG - help - The default watchdog behaviour (which you get if you say N here) is - to stop the timer if the process managing it closes the file - /dev/watchdog. It's always remotely possible that this process might - get killed. If you say Y here, the watchdog cannot be stopped once - it has been started. - -config SH_WDT - tristate "SuperH Watchdog" - depends on WATCHDOG - help - This driver adds watchdog support for the integrated watchdog in the - SuperH processors. If you have one of these processors and wish - to have watchdog support enabled, say Y, otherwise say N. - - As a side note, saying Y here will automatically boost HZ to 1000 - so that the timer has a chance to clear the overflow counter. On - slower systems (such as the SH-2 and SH-3) this will likely yield - some performance issues. As such, the WDT should be avoided here - unless it is absolutely necessary. - - To compile this driver as a module, choose M here: the - module will be called shwdt. - -endmenu +source "drivers/char/watchdog/Kconfig" config RTC tristate "Enhanced Real Time Clock Support" --- linux-2.6.3-rc2/arch/sh/kernel/sys_sh.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sh/kernel/sys_sh.c 2004-02-12 00:40:55.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -240,16 +241,12 @@ asmlinkage int sys_uname(struct old_utsn asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char * buf, size_t count, long dummy, loff_t pos) { - extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); return sys_pread64(fd, buf, count, pos); } asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char * buf, size_t count, long dummy, loff_t pos) { - extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); return sys_pwrite64(fd, buf, count, pos); } --- linux-2.6.3-rc2/arch/sparc64/Kconfig 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sparc64/Kconfig 2004-02-12 00:41:21.000000000 -0800 @@ -639,20 +639,7 @@ source "sound/Kconfig" source "drivers/usb/Kconfig" - -menu "Watchdog" - -config SOFT_WATCHDOG - tristate "Software watchdog" - help - A software monitoring watchdog. This will fail to reboot your system - from some situations that the hardware watchdog will recover - from. Equally it's a lot cheaper to install. - - To compile this driver as a module, choose M here: the - module will be called softdog. - -endmenu +source "drivers/char/watchdog/Kconfig" source "arch/sparc64/oprofile/Kconfig" @@ -731,12 +718,19 @@ config DEBUG_BOOTMEM depends on DEBUG_KERNEL bool "Debug BOOTMEM initialization" +config LOCKMETER + bool "Kernel lock metering" + depends on SMP && !PREEMPT + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + # We have a custom atomic_dec_and_lock() implementation but it's not # compatible with spinlock debugging so we need to fall back on # the generic version in that case. config HAVE_DEC_LOCK bool - depends on SMP && !DEBUG_SPINLOCK + depends on SMP && !DEBUG_SPINLOCK && !LOCKMETER default y config MCOUNT --- linux-2.6.3-rc2/arch/sparc64/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/sparc64/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -1211,7 +1211,7 @@ static int irq_affinity_read_proc (char if (cpus_empty(mask)) mask = cpu_online_map; - len = cpumask_snprintf(page, count, mask); + len = cpumask_scnprintf(page, count, mask); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/sparc64/kernel/setup.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sparc64/kernel/setup.c 2004-02-12 00:40:55.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,6 @@ struct screen_info screen_info = { void (*prom_palette)(int); void (*prom_keyboard)(void); -asmlinkage void sys_sync(void); /* it's really int */ static void prom_console_write(struct console *con, const char *s, unsigned n) --- linux-2.6.3-rc2/arch/sparc64/kernel/sparc64_ksyms.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/sparc64/kernel/sparc64_ksyms.c 2004-02-12 00:40:55.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -84,21 +85,13 @@ extern void syscall_trace(void); extern u32 sunos_sys_table[], sys_call_table32[]; extern void tl0_solaris(void); extern void sys_sigsuspend(void); -extern int sys_getppid(void); -extern int sys_getpid(void); -extern int sys_geteuid(void); -extern int sys_getuid(void); -extern int sys_getegid(void); -extern int sys_getgid(void); extern int svr4_getcontext(svr4_ucontext_t *uc, struct pt_regs *regs); extern int svr4_setcontext(svr4_ucontext_t *uc, struct pt_regs *regs); -extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); extern int compat_sys_ioctl(unsigned int fd, unsigned int cmd, u32 arg); extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *); extern long sparc32_open(const char * filename, int flags, int mode); extern int io_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot, int space); -extern long sys_close(unsigned int); - + extern int __ashrdi3(int, int); extern void dump_thread(struct pt_regs *, struct user *); --- linux-2.6.3-rc2/arch/sparc64/kernel/sunos_ioctl32.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/sparc64/kernel/sunos_ioctl32.c 2004-02-12 00:40:52.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -90,8 +91,6 @@ struct ifconf32 { compat_caddr_t ifcbuf; }; -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - extern asmlinkage int compat_sys_ioctl(unsigned int, unsigned int, u32); extern asmlinkage int sys_setsid(void); --- linux-2.6.3-rc2/arch/sparc64/kernel/sys_sparc32.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sparc64/kernel/sys_sparc32.c 2004-02-12 00:40:55.000000000 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -88,17 +89,6 @@ __ret; \ }) -extern asmlinkage long sys_chown(const char *, uid_t,gid_t); -extern asmlinkage long sys_lchown(const char *, uid_t,gid_t); -extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t); -extern asmlinkage long sys_setregid(gid_t, gid_t); -extern asmlinkage long sys_setgid(gid_t); -extern asmlinkage long sys_setreuid(uid_t, uid_t); -extern asmlinkage long sys_setuid(uid_t); -extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t); -extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t); -extern asmlinkage long sys_setfsuid(uid_t); -extern asmlinkage long sys_setfsgid(gid_t); asmlinkage long sys32_chown16(const char * filename, u16 user, u16 group) { @@ -179,40 +169,81 @@ asmlinkage long sys32_setfsgid16(u16 gid return sys_setfsgid((gid_t)gid); } +static int groups16_to_user(u16 *grouplist, struct group_info *group_info) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (u16)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int groups16_from_user(struct group_info *group_info, u16 *grouplist) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} + asmlinkage long sys32_getgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i,j; + int i; if (gidsetsize < 0) return -EINVAL; - i = current->ngroups; + + get_group_info(current->group_info); + i = current->group_info->ngroups; if (gidsetsize) { - if (i > gidsetsize) - return -EINVAL; - for(j=0;jgroups[j]; - if (copy_to_user(grouplist, groups, sizeof(u16)*i)) - return -EFAULT; + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->group_info)) { + i = -EFAULT; + goto out; + } } +out: + put_group_info(current->group_info); return i; } asmlinkage long sys32_setgroups16(int gidsetsize, u16 *grouplist) { - u16 groups[NGROUPS]; - int i; + struct group_info *group_info; + int retval; if (!capable(CAP_SETGID)) return -EPERM; - if ((unsigned) gidsetsize > NGROUPS) + if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; - if (copy_from_user(groups, grouplist, gidsetsize * sizeof(u16))) - return -EFAULT; - for (i = 0 ; i < gidsetsize ; i++) - current->groups[i] = (gid_t)groups[i]; - current->ngroups = gidsetsize; - return 0; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; } asmlinkage long sys32_getuid16(void) @@ -795,9 +826,6 @@ out: return err; } -extern asmlinkage long sys_truncate(const char * path, unsigned long length); -extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); - asmlinkage int sys32_truncate64(const char * path, unsigned long high, unsigned long low) { if ((int)high < 0) @@ -1303,8 +1331,6 @@ int cp_compat_stat(struct kstat *stat, s return err; } -extern asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2); - asmlinkage int sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); @@ -1530,8 +1556,6 @@ struct sysinfo32 { char _f[20-2*sizeof(int)-sizeof(int)]; }; -extern asmlinkage int sys_sysinfo(struct sysinfo *info); - asmlinkage int sys32_sysinfo(struct sysinfo32 *info) { struct sysinfo s; @@ -1579,8 +1603,6 @@ asmlinkage int sys32_sysinfo(struct sysi return ret; } -extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval); - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) { struct timespec t; @@ -1595,8 +1617,6 @@ asmlinkage int sys32_sched_rr_get_interv return ret; } -extern asmlinkage int sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize); - asmlinkage int sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, compat_size_t sigsetsize) { sigset_t s; @@ -1631,8 +1651,6 @@ asmlinkage int sys32_rt_sigprocmask(int return 0; } -extern asmlinkage int sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - asmlinkage int sys32_rt_sigpending(compat_sigset_t *set, compat_size_t sigsetsize) { sigset_t s; @@ -1740,9 +1758,6 @@ sys32_rt_sigtimedwait(compat_sigset_t *u return ret; } -extern asmlinkage int -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage int sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { @@ -2073,15 +2088,11 @@ out: #ifdef CONFIG_MODULES -extern asmlinkage long sys_init_module(void *, unsigned long, const char *); - asmlinkage int sys32_init_module(void *umod, u32 len, const char *uargs) { return sys_init_module(umod, len, uargs); } -extern asmlinkage long sys_delete_module(const char *, unsigned int); - asmlinkage int sys32_delete_module(const char *name_user, unsigned int flags) { return sys_delete_module(name_user, flags); @@ -2323,7 +2334,6 @@ done: return err; } #else /* !NFSD */ -extern asmlinkage long sys_ni_syscall(void); int asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2) { return sys_ni_syscall(); @@ -2416,17 +2426,6 @@ asmlinkage int sys32_pause(void) } /* PCI config space poking. */ -extern asmlinkage int sys_pciconfig_read(unsigned long bus, - unsigned long dfn, - unsigned long off, - unsigned long len, - unsigned char *buf); - -extern asmlinkage int sys_pciconfig_write(unsigned long bus, - unsigned long dfn, - unsigned long off, - unsigned long len, - unsigned char *buf); asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) { @@ -2446,9 +2445,6 @@ asmlinkage int sys32_pciconfig_write(u32 (unsigned char *)AA(ubuf)); } -extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); - asmlinkage int sys32_prctl(int option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { return sys_prctl(option, @@ -2459,12 +2455,6 @@ asmlinkage int sys32_prctl(int option, u } -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); - -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); - asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, char *ubuf, compat_size_t count, u32 poshi, u32 poslo) { @@ -2477,8 +2467,6 @@ asmlinkage compat_ssize_t sys32_pwrite64 return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -extern asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); - asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) { return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); @@ -2495,8 +2483,6 @@ long sys32_fadvise64_64(int fd, u32 offh ((loff_t)AA(lenhi)<<32)|AA(lenlo), advice); } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count); - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2516,8 +2502,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -extern asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t *offset, size_t count); - asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -2692,8 +2676,6 @@ out: return ret; } -extern asmlinkage long sys_setpriority(int which, int who, int niceval); - asmlinkage int sys_setpriority32(u32 which, u32 who, u32 niceval) { return sys_setpriority((int) which, @@ -2749,8 +2731,6 @@ asmlinkage long sys32_sysctl(struct __sy return error; } -extern long sys_lookup_dcookie(u64 cookie64, char *buf, size_t len); - long sys32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char *buf, size_t len) { return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low, --- linux-2.6.3-rc2/arch/sparc64/kernel/sys_sparc.c 2003-07-13 21:44:34.000000000 -0700 +++ 25/arch/sparc64/kernel/sys_sparc.c 2004-02-12 00:40:57.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -161,8 +162,6 @@ unsigned long get_fb_unmapped_area(struc return addr; } -extern asmlinkage unsigned long sys_brk(unsigned long brk); - asmlinkage unsigned long sparc_brk(unsigned long brk) { /* People could try to be nasty and use ta 0x6d in 32bit programs */ @@ -280,8 +279,6 @@ out: return err; } -extern asmlinkage int sys_newuname(struct new_utsname __user *name); - asmlinkage int sparc64_newuname(struct new_utsname __user *name) { int ret = sys_newuname(name); @@ -292,8 +289,6 @@ asmlinkage int sparc64_newuname(struct n return ret; } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage int sparc64_personality(unsigned long personality) { int ret; --- linux-2.6.3-rc2/arch/sparc64/kernel/sys_sunos32.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc64/kernel/sys_sunos32.c 2004-02-12 00:40:55.000000000 -0800 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -584,11 +585,6 @@ struct sunos_nfs_mount_args { char *netname; /* server's netname */ }; -extern asmlinkage int sys_mount(char *, char *, char *, unsigned long, void *); -extern asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen); -extern asmlinkage int sys_socket(int family, int type, int protocol); -extern asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen); - /* Bind the socket on a local reserved port and connect it to the * remote server. This on Linux/i386 is done by the mount program, @@ -781,8 +777,6 @@ out: return ret; } -extern asmlinkage int sys_setsid(void); -extern asmlinkage int sys_setpgid(pid_t, pid_t); asmlinkage int sunos_setpgrp(pid_t pid, pid_t pgid) { @@ -1200,11 +1194,6 @@ static inline int check_nonblock(int ret return ret; } -extern asmlinkage ssize_t sys_read(unsigned int fd, char *buf, unsigned long count); -extern asmlinkage ssize_t sys_write(unsigned int fd, char *buf, unsigned long count); -extern asmlinkage int sys_recv(int fd, void *ubuf, size_t size, unsigned flags); -extern asmlinkage int sys_send(int fd, void *buff, size_t len, unsigned flags); -extern asmlinkage int sys_accept(int fd, struct sockaddr *sa, int *addrlen); extern asmlinkage int sys32_readv(u32 fd, u32 vector, s32 count); extern asmlinkage int sys32_writev(u32 fd, u32 vector, s32 count); @@ -1302,9 +1291,6 @@ asmlinkage int sunos_sigaction (int sig, return ret; } -extern asmlinkage int sys_setsockopt(int fd, int level, int optname, - char *optval, int optlen); - asmlinkage int sunos_setsockopt(int fd, int level, int optname, u32 optval, int optlen) { --- linux-2.6.3-rc2/arch/sparc64/lib/rwlock.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/sparc64/lib/rwlock.S 2004-02-12 00:41:21.000000000 -0800 @@ -85,5 +85,20 @@ __write_trylock_succeed: __write_trylock_fail: retl mov 0, %o0 + + .globl __read_trylock +__read_trylock: /* %o0 = lock_ptr */ + ldsw [%o0], %g5 + brlz,pn %g5, 100f + add %g5, 1, %g7 + cas [%o0], %g5, %g7 + cmp %g5, %g7 + bne,pn %icc, __read_trylock + membar #StoreLoad | #StoreStore + retl + mov 1, %o0 +100: retl + mov 0, %o0 + rwlock_impl_end: --- linux-2.6.3-rc2/arch/sparc64/lib/VIScopy.S 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/sparc64/lib/VIScopy.S 2004-02-12 00:39:25.000000000 -0800 @@ -120,7 +120,6 @@ #define EXVIS2(x,y) EXVISN(x,y,2) #define EXVIS3(x,y) EXVISN(x,y,3) #define EXVIS4(x,y) EXVISN(x,y,4) -#define EXVIS5(x,y) EXVISN(x,y,5) #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ faligndata %f1, %f2, %f48; \ @@ -135,7 +134,7 @@ #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ EXVIS(LDBLK [%src] ASIBLK, %fdest); \ ASI_SETDST_BLK \ - EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ + EXVIS(STBLK %fsrc, [%dest] ASIBLK); \ add %src, 0x40, %src; \ subcc %len, 0x40, %len; \ be,pn %xcc, jmptgt; \ @@ -156,14 +155,14 @@ #ifdef __KERNEL__ #define STORE_JUMP(dest, fsrc, target) \ srl asi_dest, 3, %g5; \ - EXVIS3(STBLK %fsrc, [%dest] ASIBLK); \ + EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ xor asi_dest, ASI_BLK_XOR1, asi_dest;\ add %dest, 0x40, %dest; \ xor asi_dest, %g5, asi_dest; \ ba,pt %xcc, target; #else #define STORE_JUMP(dest, fsrc, target) \ - EXVIS3(STBLK %fsrc, [%dest] ASIBLK); \ + EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; #endif @@ -182,7 +181,7 @@ subcc %left, 8, %left; \ bl,pn %xcc, vis_out; \ faligndata %f0, %f1, %f48; \ - EXVIS4(STDF %f48, [%dest] ASINORMAL); \ + EXVIS3(STDF %f48, [%dest] ASINORMAL); \ add %dest, 8, %dest; #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ @@ -675,21 +674,21 @@ vis_out_slk: xor asi_src, %g5, asi_src ! IEU0 Group #endif vis_slk:ASI_SETSRC_NOBLK ! LSU Group - EXVIS4(LDDF [%o1] ASINORMAL, %f2) ! Load Group + EXVIS3(LDDF [%o1] ASINORMAL, %f2) ! Load Group add %o1, 8, %o1 ! IEU0 subcc %g3, 8, %g3 ! IEU1 ASI_SETDST_NOBLK ! LSU Group faligndata %f0, %f2, %f8 ! GRU Group - EXVIS5(STDF %f8, [%o0] ASINORMAL) ! Store + EXVIS4(STDF %f8, [%o0] ASINORMAL) ! Store bl,pn %xcc, vis_out_slp ! CTI add %o0, 8, %o0 ! IEU0 Group ASI_SETSRC_NOBLK ! LSU Group - EXVIS4(LDDF [%o1] ASINORMAL, %f0) ! Load Group + EXVIS3(LDDF [%o1] ASINORMAL, %f0) ! Load Group add %o1, 8, %o1 ! IEU0 subcc %g3, 8, %g3 ! IEU1 ASI_SETDST_NOBLK ! LSU Group faligndata %f2, %f0, %f8 ! GRU Group - EXVIS5(STDF %f8, [%o0] ASINORMAL) ! Store + EXVIS4(STDF %f8, [%o0] ASINORMAL) ! Store bge,pt %xcc, vis_slk ! CTI add %o0, 8, %o0 ! IEU0 Group vis_out_slp: @@ -1138,20 +1137,18 @@ VIScopyfixup4: mov (7 * 16), %g7 sub %g7, %g2, %g7 ba,pt %xcc, VIScopyfixup_ret add %g7, %o2, %o1 -VIScopyfixup_vis3: - sub %o2, 0x80, %o2 VIScopyfixup_vis2: - add %o2, 0x40, %o2 + sub %o2, 0x40, %o2 VIScopyfixup_vis0: add %o2, 0x80, %o2 VIScopyfixup_vis1: add %g7, %g3, %g7 ba,pt %xcc, VIScopyfixup_ret add %o2, %g7, %o1 -VIScopyfixup_vis5: - add %g3, 8, %g3 VIScopyfixup_vis4: add %g3, 8, %g3 +VIScopyfixup_vis3: + add %g3, 8, %g3 ba,pt %xcc, VIScopyfixup_ret add %o2, %g3, %o1 #endif --- linux-2.6.3-rc2/arch/sparc64/prom/printf.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/sparc64/prom/printf.c 2004-02-12 00:40:49.000000000 -0800 @@ -40,7 +40,7 @@ prom_printf(char *fmt, ...) int i; va_start(args, fmt); - i = vsnprintf(ppbuf, sizeof(ppbuf), fmt, args); + i = vscnprintf(ppbuf, sizeof(ppbuf), fmt, args); va_end(args); prom_write(ppbuf, i); --- linux-2.6.3-rc2/arch/sparc64/solaris/ioctl.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/sparc64/solaris/ioctl.c 2004-02-12 00:40:52.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,6 @@ #include "conv.h" #include "socksys.h" -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); extern asmlinkage int compat_sys_ioctl(unsigned int fd, unsigned int cmd, u32 arg); asmlinkage int solaris_ioctl(unsigned int fd, unsigned int cmd, u32 arg); --- linux-2.6.3-rc2/arch/sparc64/solaris/socksys.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc64/solaris/socksys.c 2004-02-12 00:40:52.000000000 -0800 @@ -35,9 +35,6 @@ #include "conv.h" #include "socksys.h" -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); - static int af_inet_protocols[] = { IPPROTO_ICMP, IPPROTO_ICMP, IPPROTO_IGMP, IPPROTO_IPIP, IPPROTO_TCP, IPPROTO_EGP, IPPROTO_PUP, IPPROTO_UDP, IPPROTO_IDP, IPPROTO_RAW, --- linux-2.6.3-rc2/arch/sparc64/solaris/timod.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc64/solaris/timod.c 2004-02-12 00:40:52.000000000 -0800 @@ -27,8 +27,6 @@ #include "conv.h" #include "socksys.h" -extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); asmlinkage int solaris_ioctl(unsigned int fd, unsigned int cmd, u32 arg); static spinlock_t timod_pagelock = SPIN_LOCK_UNLOCKED; --- linux-2.6.3-rc2/arch/sparc/Kconfig 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sparc/Kconfig 2004-02-12 00:39:25.000000000 -0800 @@ -376,20 +376,7 @@ source "sound/Kconfig" source "drivers/usb/Kconfig" -menu "Watchdog" - -config SOFT_WATCHDOG - tristate "Software watchdog" - help - A software monitoring watchdog. This will fail to reboot your system - from some situations that the hardware watchdog will recover - from. Equally it's a lot cheaper to install. - - To compile this driver as a module, choose M here: the - module will be called softdog. - -endmenu - +source "drivers/char/watchdog/Kconfig" menu "Kernel hacking" --- linux-2.6.3-rc2/arch/sparc/kernel/setup.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/sparc/kernel/setup.c 2004-02-12 00:40:55.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,6 @@ struct screen_info screen_info = { extern unsigned long trapbase; void (*prom_palette)(int); -asmlinkage void sys_sync(void); /* it's really int */ /* Pretty sick eh? */ void prom_sync_me(void) --- linux-2.6.3-rc2/arch/sparc/kernel/sunos_ioctl.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/sparc/kernel/sunos_ioctl.c 2004-02-12 00:40:52.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,6 @@ extern char sunkbd_layout; /* NR_OPEN is now larger and dynamic in recent kernels. */ #define SUNOS_NR_OPEN 256 -extern asmlinkage int sys_ioctl(unsigned int, unsigned int, unsigned long); extern asmlinkage int sys_setsid(void); asmlinkage int sunos_ioctl (int fd, unsigned long cmd, unsigned long arg) --- linux-2.6.3-rc2/arch/sparc/kernel/sys_sparc.c 2003-07-13 21:44:34.000000000 -0700 +++ 25/arch/sparc/kernel/sys_sparc.c 2004-02-12 00:40:57.000000000 -0800 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -78,8 +79,6 @@ unsigned long arch_get_unmapped_area(str } } -extern asmlinkage unsigned long sys_brk(unsigned long brk); - asmlinkage unsigned long sparc_brk(unsigned long brk) { if(ARCH_SUN4C_SUN4) { --- linux-2.6.3-rc2/arch/sparc/kernel/sys_sunos.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/sparc/kernel/sys_sunos.c 2004-02-12 00:40:55.000000000 -0800 @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -564,8 +565,6 @@ asmlinkage int sunos_pathconf(char *path } /* SunOS mount system call emulation */ -extern asmlinkage int -sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp); asmlinkage int sunos_select(int width, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) { @@ -621,11 +620,6 @@ struct sunos_nfs_mount_args { }; -extern asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen); -extern asmlinkage int sys_socket(int family, int type, int protocol); -extern asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen); - - /* Bind the socket on a local reserved port and connect it to the * remote server. This on Linux/i386 is done by the mount program, * not by the kernel. @@ -814,8 +808,6 @@ out: return ret; } -extern asmlinkage int sys_setsid(void); -extern asmlinkage int sys_setpgid(pid_t, pid_t); asmlinkage int sunos_setpgrp(pid_t pid, pid_t pgid) { @@ -1050,15 +1042,6 @@ static inline int check_nonblock(int ret return ret; } -extern asmlinkage ssize_t sys_read(unsigned int fd,char *buf,int count); -extern asmlinkage ssize_t sys_write(unsigned int fd,char *buf,int count); -extern asmlinkage int sys_recv(int fd, void * ubuf, int size, unsigned flags); -extern asmlinkage int sys_send(int fd, void * buff, int len, unsigned flags); -extern asmlinkage int sys_accept(int fd, struct sockaddr *sa, int *addrlen); -extern asmlinkage int sys_readv(unsigned long fd, const struct iovec * vector, long count); -extern asmlinkage int sys_writev(unsigned long fd, const struct iovec * vector, long count); - - asmlinkage int sunos_read(unsigned int fd,char *buf,int count) { int ret; @@ -1164,9 +1147,6 @@ sunos_sigaction(int sig, const struct ol } -extern asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen); -extern asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen); - asmlinkage int sunos_setsockopt(int fd, int level, int optname, char *optval, int optlen) { --- linux-2.6.3-rc2/arch/sparc/prom/printf.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/sparc/prom/printf.c 2004-02-12 00:40:49.000000000 -0800 @@ -39,7 +39,7 @@ prom_printf(char *fmt, ...) int i; va_start(args, fmt); - i = vsnprintf(ppbuf, sizeof(ppbuf), fmt, args); + i = vscnprintf(ppbuf, sizeof(ppbuf), fmt, args); va_end(args); prom_write(ppbuf, i); --- linux-2.6.3-rc2/arch/um/config.release 2003-08-08 22:55:11.000000000 -0700 +++ 25/arch/um/config.release 2004-02-12 00:40:26.000000000 -0800 @@ -228,7 +228,6 @@ CONFIG_ROMFS_FS=m CONFIG_EXT2_FS=y CONFIG_SYSV_FS=m CONFIG_UDF_FS=m -# CONFIG_UDF_RW is not set CONFIG_UFS_FS=m # CONFIG_UFS_FS_WRITE is not set --- linux-2.6.3-rc2/arch/um/defconfig 2003-08-08 22:55:11.000000000 -0700 +++ 25/arch/um/defconfig 2004-02-12 00:40:26.000000000 -0800 @@ -3,29 +3,19 @@ # CONFIG_USERMODE=y CONFIG_MMU=y -CONFIG_SWAP=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_CONFIG_LOG_BUF_SHIFT=14 # -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General Setup +# UML-specific options # CONFIG_MODE_TT=y CONFIG_MODE_SKAS=y CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_AOUT=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=y CONFIG_HOSTFS=y +CONFIG_HPPFS=y CONFIG_MCONSOLE=y CONFIG_MAGIC_SYSRQ=y # CONFIG_HOST_2G_2G is not set @@ -34,14 +24,43 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_NEST_LEVEL=0 CONFIG_KERNEL_HALF_GIGS=1 # CONFIG_HIGHMEM is not set -CONFIG_PROC_MM=y +# CONFIG_PROC_MM is not set CONFIG_KERNEL_STACK_ORDER=2 +CONFIG_UML_REAL_TIME_CLOCK=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_STANDALONE=y +CONFIG_BROKEN_ON_SMP=y + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_IKCONFIG is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y # # Loadable module support # -CONFIG_MODULES=y -# CONFIG_KMOD is not set +# CONFIG_MODULES is not set + +# +# Generic Driver Options +# # # Character Devices @@ -69,6 +88,7 @@ CONFIG_HOSTAUDIO=y # CONFIG_BLK_DEV_UBD=y # CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_COW_COMMON=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y @@ -78,7 +98,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_NETDEVICES=y # -# Network Devices +# UML Network Devices # CONFIG_UML_NET=y CONFIG_UML_NET_ETHERTAP=y @@ -88,22 +108,6 @@ CONFIG_UML_NET_DAEMON=y CONFIG_UML_NET_MCAST=y # CONFIG_UML_NET_PCAP is not set CONFIG_UML_NET_SLIRP=y -CONFIG_DUMMY=y -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -CONFIG_TUN=y -# CONFIG_ETHERTAP is not set -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_ASYNC is not set -# CONFIG_PPP_SYNC_TTY is not set -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_SLIP=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set # # Networking support @@ -115,8 +119,6 @@ CONFIG_SLIP=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y # CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -# CONFIG_FILTER is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -130,8 +132,11 @@ CONFIG_INET=y # CONFIG_SYN_COOKIES is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set -# CONFIG_XFRM_USER is not set +# CONFIG_INET_IPCOMP is not set # CONFIG_IPV6 is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_NETFILTER is not set # # SCTP Configuration (EXPERIMENTAL) @@ -140,9 +145,9 @@ CONFIG_IPV6_SCTP__=y # CONFIG_IP_SCTP is not set # CONFIG_ATM is not set # CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_NET_DIVERT is not set @@ -160,6 +165,10 @@ CONFIG_IPV6_SCTP__=y # Network testing # # CONFIG_NET_PKTGEN is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y # # Ethernet (10 or 100Mbit) @@ -171,12 +180,28 @@ CONFIG_IPV6_SCTP__=y # # +# Ethernet (10000 Mbit) +# +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +# CONFIG_PPP_ASYNC is not set +# CONFIG_PPP_SYNC_TTY is not set +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPPOE is not set +CONFIG_SLIP=y +# CONFIG_SLIP_COMPRESSED is not set +# CONFIG_SLIP_SMART is not set +# CONFIG_SLIP_MODE_SLIP6 is not set + +# # Wireless LAN (non-hamradio) # # CONFIG_NET_RADIO is not set # -# Token Ring devices (depends on LLC=y) +# Token Ring devices # # CONFIG_SHAPER is not set @@ -186,68 +211,101 @@ CONFIG_IPV6_SCTP__=y # CONFIG_WAN is not set # +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# Bluetooth support +# +# CONFIG_BT is not set + +# # File systems # +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +CONFIG_MINIX_FS=y +# CONFIG_ROMFS_FS is not set CONFIG_QUOTA=y # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_DEVFS_FS=y +CONFIG_DEVFS_MOUNT=y +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_DEVPTS_FS_XATTR is not set +# CONFIG_TMPFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set -# CONFIG_EXT3_FS is not set -# CONFIG_JBD is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m # CONFIG_EFS_FS is not set CONFIG_JFFS_FS=y CONFIG_JFFS_FS_VERBOSE=0 -CONFIG_JFFS_PROC_FS=y # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set -# CONFIG_TMPFS is not set -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=m -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -CONFIG_MINIX_FS=m # CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set # CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -CONFIG_DEVFS_MOUNT=y -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y # CONFIG_QNX4FS_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set # CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_XFS_FS is not set # # Network File Systems # -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set # CONFIG_EXPORTFS is not set -# CONFIG_CIFS is not set # CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set # CONFIG_AFS_FS is not set # @@ -317,28 +375,7 @@ CONFIG_NLS_DEFAULT="iso8859-1" # # SCSI support # -CONFIG_SCSI=y -CONFIG_GENERIC_ISA_DMA=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=2 -CONFIG_CHR_DEV_SG=y - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -CONFIG_SCSI_DEBUG_QUEUES=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_DEBUG=y +# CONFIG_SCSI is not set # # Multi-device support (RAID and LVM) @@ -360,6 +397,7 @@ CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y # CONFIG_FTL is not set # CONFIG_NFTL is not set +# CONFIG_INFTL is not set # # RAM/ROM/Flash chip drivers @@ -374,20 +412,21 @@ CONFIG_MTD_BLOCK=y # # Mapping drivers for chip access # +# CONFIG_MTD_COMPLEX_MAPPINGS is not set # # Self-contained MTD device drivers # # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_MTDRAM is not set -CONFIG_MTD_BLKMTD=m +CONFIG_MTD_BLKMTD=y # # Disk-On-Chip Device Drivers # -# CONFIG_MTD_DOC1000 is not set # CONFIG_MTD_DOC2000 is not set # CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set # # NAND Flash Device Drivers --- linux-2.6.3-rc2/arch/um/drivers/chan_kern.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/drivers/chan_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include "chan_kern.h" @@ -265,6 +266,11 @@ static int one_chan_config_string(struct { int n = 0; + if(chan == NULL){ + CONFIG_CHUNK(str, size, n, "none", 1); + return(n); + } + CONFIG_CHUNK(str, size, n, chan->ops->type, 0); if(chan->dev == NULL){ @@ -420,7 +426,8 @@ int parse_chan_pair(char *str, struct li INIT_LIST_HEAD(chans); } - if((out = strchr(str, ',')) != NULL){ + out = strchr(str, ','); + if(out != NULL){ in = str; *out = '\0'; out++; @@ -475,12 +482,15 @@ void chan_interrupt(struct list_head *ch goto out; } err = chan->ops->read(chan->fd, &c, chan->data); - if(err > 0) tty_receive_char(tty, c); + if(err > 0) + tty_receive_char(tty, c); } while(err > 0); + if(err == 0) reactivate_fd(chan->fd, irq); if(err == -EIO){ if(chan->primary){ - if(tty != NULL) tty_hangup(tty); + if(tty != NULL) + tty_hangup(tty); line_disable(dev, irq); close_chan(chans); free_chan(chans); --- linux-2.6.3-rc2/arch/um/drivers/chan_user.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/drivers/chan_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -24,29 +23,27 @@ void generic_close(int fd, void *unused) { - close(fd); + os_close_file(fd); } int generic_read(int fd, char *c_out, void *unused) { int n; - n = read(fd, c_out, sizeof(*c_out)); - if(n < 0){ - if(errno == EAGAIN) return(0); - return(-errno); - } - else if(n == 0) return(-EIO); - return(1); + n = os_read_file(fd, c_out, sizeof(*c_out)); + + if(n == -EAGAIN) + return(0); + else if(n == 0) + return(-EIO); + return(n); } +/* XXX Trivial wrapper around os_write_file */ + int generic_write(int fd, const char *buf, int n, void *unused) { - int count; - - count = write(fd, buf, n); - if(count < 0) return(-errno); - return(count); + return(os_write_file(fd, buf, n)); } int generic_console_write(int fd, const char *buf, int n, void *unused) @@ -68,15 +65,18 @@ int generic_console_write(int fd, const int generic_window_size(int fd, void *unused, unsigned short *rows_out, unsigned short *cols_out) { - struct winsize size; - int ret = 0; + int rows, cols; + int ret; + + ret = os_window_size(fd, &rows, &cols); + if(ret < 0) + return(ret); + + ret = ((*rows_out != rows) || (*cols_out != cols)); + + *rows_out = rows; + *cols_out = cols; - if(ioctl(fd, TIOCGWINSZ, &size) == 0){ - ret = ((*rows_out != size.ws_row) || - (*cols_out != size.ws_col)); - *rows_out = size.ws_row; - *cols_out = size.ws_col; - } return(ret); } @@ -100,14 +100,16 @@ static int winch_thread(void *arg) struct winch_data *data = arg; sigset_t sigs; int pty_fd, pipe_fd; + int count, err; char c = 1; - close(data->close_me); + os_close_file(data->close_me); pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; - if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)) + count = os_write_file(pipe_fd, &c, sizeof(c)); + if(count != sizeof(c)) printk("winch_thread : failed to write synchronization " - "byte, errno = %d\n", errno); + "byte, err = %d\n", -count); signal(SIGWINCH, winch_handler); sigfillset(&sigs); @@ -123,26 +125,24 @@ static int winch_thread(void *arg) exit(1); } - if(ioctl(pty_fd, TIOCSCTTY, 0) < 0){ - printk("winch_thread : TIOCSCTTY failed, errno = %d\n", errno); - exit(1); - } - if(tcsetpgrp(pty_fd, os_getpid()) < 0){ - printk("winch_thread : tcsetpgrp failed, errno = %d\n", errno); + err = os_new_tty_pgrp(pty_fd, os_getpid()); + if(err < 0){ + printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err); exit(1); } - if(read(pipe_fd, &c, sizeof(c)) != sizeof(c)) + count = os_read_file(pipe_fd, &c, sizeof(c)); + if(count != sizeof(c)) printk("winch_thread : failed to read synchronization byte, " - "errno = %d\n", errno); + "err = %d\n", -count); while(1){ pause(); - if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)){ - printk("winch_thread : write failed, errno = %d\n", - errno); - } + count = os_write_file(pipe_fd, &c, sizeof(c)); + if(count != sizeof(c)) + printk("winch_thread : write failed, err = %d\n", + -count); } } @@ -154,8 +154,8 @@ static int winch_tramp(int fd, void *dev char c; err = os_pipe(fds, 1, 1); - if(err){ - printk("winch_tramp : os_pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("winch_tramp : os_pipe failed, err = %d\n", -err); return(err); } @@ -168,12 +168,12 @@ static int winch_tramp(int fd, void *dev return(pid); } - close(fds[1]); + os_close_file(fds[1]); *fd_out = fds[0]; - n = read(fds[0], &c, sizeof(c)); + n = os_read_file(fds[0], &c, sizeof(c)); if(n != sizeof(c)){ printk("winch_tramp : failed to read synchronization byte\n"); - printk("read returned %d, errno = %d\n", n, errno); + printk("read failed, err = %d\n", -n); printk("fd %d will not support SIGWINCH\n", fd); *fd_out = -1; } @@ -183,20 +183,24 @@ static int winch_tramp(int fd, void *dev void register_winch(int fd, void *device_data) { int pid, thread, thread_fd; + int count; char c = 1; - if(!isatty(fd)) return; + if(!isatty(fd)) + return; pid = tcgetpgrp(fd); - if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) && - (pid == -1)){ + if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, + device_data) && (pid == -1)){ thread = winch_tramp(fd, device_data, &thread_fd); if(fd != -1){ register_winch_irq(thread_fd, fd, thread, device_data); - if(write(thread_fd, &c, sizeof(c)) != sizeof(c)) + count = os_write_file(thread_fd, &c, sizeof(c)); + if(count != sizeof(c)) printk("register_winch : failed to write " - "synchronization byte\n"); + "synchronization byte, err = %d\n", + -count); } } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow.h 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,41 @@ +#ifndef __COW_H__ +#define __COW_H__ + +#include + +#if __BYTE_ORDER == __BIG_ENDIAN +# define ntohll(x) (x) +# define htonll(x) (x) +#elif __BYTE_ORDER == __LITTLE_ENDIAN +# define ntohll(x) bswap_64(x) +# define htonll(x) bswap_64(x) +#else +#error "__BYTE_ORDER not defined" +#endif + +extern int init_cow_file(int fd, char *cow_file, char *backing_file, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out); + +extern int file_reader(__u64 offset, char *buf, int len, void *arg); +extern int read_cow_header(int (*reader)(__u64, char *, int, void *), + void *arg, __u32 *version_out, + char **backing_file_out, time_t *mtime_out, + __u64 *size_out, int *sectorsize_out, + __u32 *align_out, int *bitmap_offset_out); + +extern int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, long long *size); + +extern void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out); + +#endif + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,630 @@ +#define COW_MAJOR 60 +#define MAJOR_NR COW_MAJOR + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "2_5compat.h" +#include "cow.h" +#include "ubd_user.h" + +#define COW_SHIFT 4 + +struct cow { + int count; + char *cow_path; + dev_t cow_dev; + struct block_device *cow_bdev; + char *backing_path; + dev_t backing_dev; + struct block_device *backing_bdev; + int sectorsize; + unsigned long *bitmap; + unsigned long bitmap_len; + int bitmap_offset; + int data_offset; + devfs_handle_t devfs; + struct semaphore sem; + struct semaphore io_sem; + atomic_t working; + spinlock_t io_lock; + struct buffer_head *bh; + struct buffer_head *bhtail; + void *end_io; +}; + +#define DEFAULT_COW { \ + .count = 0, \ + .cow_path = NULL, \ + .cow_dev = 0, \ + .backing_path = NULL, \ + .backing_dev = 0, \ + .bitmap = NULL, \ + .bitmap_len = 0, \ + .bitmap_offset = 0, \ + .data_offset = 0, \ + .devfs = NULL, \ + .working = ATOMIC_INIT(0), \ + .io_lock = SPIN_LOCK_UNLOCKED, \ +} + +#define MAX_DEV (8) +#define MAX_MINOR (MAX_DEV << COW_SHIFT) + +struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW }; + +/* Not modified by this driver */ +static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; +static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; + +/* Protected by cow_lock */ +static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; + +static struct hd_struct cow_part[MAX_MINOR] = + { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; + +/* Protected by io_request_lock */ +static request_queue_t *cow_queue; + +static int cow_open(struct inode *inode, struct file *filp); +static int cow_release(struct inode * inode, struct file * file); +static int cow_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg); +static int cow_revalidate(kdev_t rdev); + +static struct block_device_operations cow_blops = { + .open = cow_open, + .release = cow_release, + .ioctl = cow_ioctl, + .revalidate = cow_revalidate, +}; + +/* Initialized in an initcall, and unchanged thereafter */ +devfs_handle_t cow_dir_handle; + +#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ +{ \ + .major = maj, \ + .major_name = name, \ + .minor_shift = shift, \ + .max_p = 1 << shift, \ + .part = parts, \ + .sizes = bsizes, \ + .nr_real = max, \ + .real_devices = NULL, \ + .next = NULL, \ + .fops = blops, \ + .de_arr = NULL, \ + .flags = 0 \ +} + +static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED; + +static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part, + COW_SHIFT, sizes, MAX_DEV, + &cow_blops); + +static int cow_add(int n) +{ + struct cow *dev = &cow_dev[n]; + char name[sizeof("nnnnnn\0")]; + int err = -ENODEV; + + if(dev->cow_path == NULL) + goto out; + + sprintf(name, "%d", n); + dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE, + MAJOR_NR, n << COW_SHIFT, S_IFBLK | + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, + &cow_blops, NULL); + + init_MUTEX_LOCKED(&dev->sem); + init_MUTEX(&dev->io_sem); + + return(0); + + out: + return(err); +} + +/* + * Add buffer_head to back of pending list + */ +static void cow_add_bh(struct cow *cow, struct buffer_head *bh) +{ + unsigned long flags; + + spin_lock_irqsave(&cow->io_lock, flags); + if(cow->bhtail != NULL){ + cow->bhtail->b_reqnext = bh; + cow->bhtail = bh; + } + else { + cow->bh = bh; + cow->bhtail = bh; + } + spin_unlock_irqrestore(&cow->io_lock, flags); +} + +/* +* Grab first pending buffer +*/ +static struct buffer_head *cow_get_bh(struct cow *cow) +{ + struct buffer_head *bh; + + spin_lock_irq(&cow->io_lock); + bh = cow->bh; + if(bh != NULL){ + if(bh == cow->bhtail) + cow->bhtail = NULL; + cow->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + } + spin_unlock_irq(&cow->io_lock); + + return(bh); +} + +static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, + struct buffer_head **cow_bh, int ncow_bh) +{ + int i; + + if(ncow_bh > 0) + ll_rw_block(WRITE, ncow_bh, cow_bh); + + for(i = 0; i < ncow_bh ; i++){ + wait_on_buffer(cow_bh[i]); + brelse(cow_bh[i]); + } + + ll_rw_block(WRITE, 1, &bh); + brelse(bh); +} + +static struct buffer_head *cow_new_bh(struct cow *dev, int sector) +{ + struct buffer_head *bh; + + sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize; + bh = getblk(dev->cow_dev, sector, dev->sectorsize); + memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])), + dev->sectorsize); + return(bh); +} + +/* Copied from loop.c, needed to avoid deadlocking in make_request. */ + +static int cow_thread(void *data) +{ + struct cow *dev = data; + struct buffer_head *bh; + + daemonize(); + exit_files(current); + + sprintf(current->comm, "cow%d", dev - cow_dev); + + spin_lock_irq(¤t->sigmask_lock); + sigfillset(¤t->blocked); + flush_signals(current); + spin_unlock_irq(¤t->sigmask_lock); + + atomic_inc(&dev->working); + + current->policy = SCHED_OTHER; + current->nice = -20; + + current->flags |= PF_NOIO; + + /* + * up sem, we are running + */ + up(&dev->sem); + + for(;;){ + int start, len, nbh, i, update_bitmap = 0; + struct buffer_head *cow_bh[2]; + + down_interruptible(&dev->io_sem); + /* + * could be upped because of tear-down, not because of + * pending work + */ + if(!atomic_read(&dev->working)) + break; + + bh = cow_get_bh(dev); + if(bh == NULL){ + printk(KERN_ERR "cow: missing bh\n"); + continue; + } + + start = bh->b_blocknr * bh->b_size / dev->sectorsize; + len = bh->b_size / dev->sectorsize; + for(i = 0; i < len ; i++){ + if(ubd_test_bit(start + i, + (unsigned char *) dev->bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(start + i, (unsigned char *) dev->bitmap); + } + + cow_bh[0] = NULL; + cow_bh[1] = NULL; + nbh = 0; + if(update_bitmap){ + cow_bh[0] = cow_new_bh(dev, start); + nbh++; + if(start / dev->sectorsize != + (start + len) / dev->sectorsize){ + cow_bh[1] = cow_new_bh(dev, start + len); + nbh++; + } + } + + bh->b_dev = dev->cow_dev; + bh->b_blocknr += dev->data_offset / dev->sectorsize; + + cow_handle_bh(dev, bh, cow_bh, nbh); + + /* + * upped both for pending work and tear-down, lo_pending + * will hit zero then + */ + if(atomic_dec_and_test(&dev->working)) + break; + } + + up(&dev->sem); + return(0); +} + +static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh) +{ + struct cow *dev; + int n, minor; + + minor = MINOR(bh->b_rdev); + n = minor >> COW_SHIFT; + dev = &cow_dev[n]; + + dev->end_io = NULL; + if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){ + bh->b_rdev = dev->cow_dev; + bh->b_rsector += dev->data_offset / dev->sectorsize; + } + else if(rw == WRITE){ + bh->b_dev = dev->cow_dev; + bh->b_blocknr += dev->data_offset / dev->sectorsize; + + cow_add_bh(dev, bh); + up(&dev->io_sem); + return(0); + } + else { + bh->b_rdev = dev->backing_dev; + } + + return(1); +} + +int cow_init(void) +{ + int i; + + cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL); + if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) { + printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR); + return -1; + } + read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ + blksize_size[MAJOR_NR] = blk_sizes; + blk_size[MAJOR_NR] = sizes; + INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); + + cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); + blk_init_queue(cow_queue, NULL); + INIT_ELV(cow_queue, &cow_queue->elevator); + blk_queue_make_request(cow_queue, cow_make_request); + + add_gendisk(&cow_gendisk); + + for(i=0;i 0){ + n = (left > blocksize) ? blocksize : left; + + bh = bread(dev, block, (n < 512) ? 512 : n); + if(bh == NULL) + return(-EIO); + + n -= offset; + memcpy(&buf[cur], bh->b_data + offset, n); + block++; + left -= n; + cur += n; + offset = 0; + brelse(bh); + } + + return(count); +} + +static int cow_open(struct inode *inode, struct file *filp) +{ + int (*dev_ioctl)(struct inode *, struct file *, unsigned int, + unsigned long); + mm_segment_t fs; + struct cow *dev; + __u64 size; + __u32 version, align; + time_t mtime; + char *backing_file; + int n, offset, err = 0; + + n = DEVICE_NR(inode->i_rdev); + if(n >= MAX_DEV) + return(-ENODEV); + dev = &cow_dev[n]; + offset = n << COW_SHIFT; + + spin_lock(&cow_lock); + + if(dev->count == 0){ + dev->cow_dev = name_to_kdev_t(dev->cow_path); + if(dev->cow_dev == 0){ + printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " + "failed\n", dev->cow_path); + err = -ENODEV; + } + + dev->backing_dev = name_to_kdev_t(dev->backing_path); + if(dev->backing_dev == 0){ + printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " + "failed\n", dev->backing_path); + err = -ENODEV; + } + + if(err) + goto out; + + dev->cow_bdev = bdget(dev->cow_dev); + if(dev->cow_bdev == NULL){ + printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", + dev->cow_path); + err = -ENOMEM; + } + dev->backing_bdev = bdget(dev->backing_dev); + if(dev->backing_bdev == NULL){ + printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", + dev->backing_path); + err = -ENOMEM; + } + + if(err) + goto out; + + err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, + BDEV_RAW); + if(err){ + printk("cow_open - blkdev_get of COW device failed, " + "error = %d\n", err); + goto out; + } + + err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW); + if(err){ + printk("cow_open - blkdev_get of backing device " + "failed, error = %d\n", err); + goto out; + } + + err = read_cow_header(reader, &dev->cow_dev, &version, + &backing_file, &mtime, &size, + &dev->sectorsize, &align, + &dev->bitmap_offset); + if(err){ + printk(KERN_ERR "cow_open - read_cow_header failed, " + "err = %d\n", err); + goto out; + } + + cow_sizes(version, size, dev->sectorsize, align, + dev->bitmap_offset, &dev->bitmap_len, + &dev->data_offset); + dev->bitmap = (void *) vmalloc(dev->bitmap_len); + if(dev->bitmap == NULL){ + err = -ENOMEM; + printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); + goto out; + } + flush_tlb_kernel_vm(); + + err = reader(dev->bitmap_offset, (char *) dev->bitmap, + dev->bitmap_len, &dev->cow_dev); + if(err < 0){ + printk(KERN_ERR "Failed to read COW bitmap\n"); + vfree(dev->bitmap); + goto out; + } + + dev_ioctl = dev->backing_bdev->bd_op->ioctl; + fs = get_fs(); + set_fs(KERNEL_DS); + err = (*dev_ioctl)(inode, filp, BLKGETSIZE, + (unsigned long) &sizes[offset]); + set_fs(fs); + if(err){ + printk(KERN_ERR "cow_open - BLKGETSIZE failed, " + "error = %d\n", err); + goto out; + } + + kernel_thread(cow_thread, dev, + CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&dev->sem); + } + dev->count++; + out: + spin_unlock(&cow_lock); + return(err); +} + +static int cow_release(struct inode * inode, struct file * file) +{ + struct cow *dev; + int n, err; + + n = DEVICE_NR(inode->i_rdev); + if(n >= MAX_DEV) + return(-ENODEV); + dev = &cow_dev[n]; + + spin_lock(&cow_lock); + + if(--dev->count > 0) + goto out; + + err = blkdev_put(dev->cow_bdev, BDEV_RAW); + if(err) + printk("cow_release - blkdev_put of cow device failed, " + "error = %d\n", err); + bdput(dev->cow_bdev); + dev->cow_bdev = 0; + + err = blkdev_put(dev->backing_bdev, BDEV_RAW); + if(err) + printk("cow_release - blkdev_put of backing device failed, " + "error = %d\n", err); + bdput(dev->backing_bdev); + dev->backing_bdev = 0; + + out: + spin_unlock(&cow_lock); + return(0); +} + +static int cow_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct cow *dev; + int (*dev_ioctl)(struct inode *, struct file *, unsigned int, + unsigned long); + int n; + + n = DEVICE_NR(inode->i_rdev); + if(n >= MAX_DEV) + return(-ENODEV); + dev = &cow_dev[n]; + + dev_ioctl = dev->backing_bdev->bd_op->ioctl; + return((*dev_ioctl)(inode, file, cmd, arg)); +} + +static int cow_revalidate(kdev_t rdev) +{ + printk(KERN_ERR "Need to implement cow_revalidate\n"); + return(0); +} + +static int parse_unit(char **ptr) +{ + char *str = *ptr, *end; + int n = -1; + + if(isdigit(*str)) { + n = simple_strtoul(str, &end, 0); + if(end == str) + return(-1); + *ptr = end; + } + else if (('a' <= *str) && (*str <= 'h')) { + n = *str - 'a'; + str++; + *ptr = str; + } + return(n); +} + +static int cow_setup(char *str) +{ + struct cow *dev; + char *cow_name, *backing_name; + int unit; + + unit = parse_unit(&str); + if(unit < 0){ + printk(KERN_ERR "cow_setup - Couldn't parse unit number\n"); + return(1); + } + + if(*str != '='){ + printk(KERN_ERR "cow_setup - Missing '=' after unit " + "number\n"); + return(1); + } + str++; + + cow_name = str; + backing_name = strchr(str, ','); + if(backing_name == NULL){ + printk(KERN_ERR "cow_setup - missing backing device name\n"); + return(0); + } + *backing_name = '\0'; + backing_name++; + + spin_lock(&cow_lock); + + dev = &cow_dev[unit]; + dev->cow_path = cow_name; + dev->backing_path = backing_name; + + spin_unlock(&cow_lock); + return(0); +} + +__setup("cow", cow_setup); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow_sys.h 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,48 @@ +#ifndef __COW_SYS_H__ +#define __COW_SYS_H__ + +#include "kern_util.h" +#include "user_util.h" +#include "os.h" +#include "user.h" + +static inline void *cow_malloc(int size) +{ + return(um_kmalloc(size)); +} + +static inline void cow_free(void *ptr) +{ + kfree(ptr); +} + +#define cow_printf printk + +static inline char *cow_strdup(char *str) +{ + return(uml_strdup(str)); +} + +static inline int cow_seek_file(int fd, __u64 offset) +{ + return(os_seek_file(fd, offset)); +} + +static inline int cow_file_size(char *file, __u64 *size_out) +{ + return(os_file_size(file, size_out)); +} + +static inline int cow_write_file(int fd, char *buf, int size) +{ + return(os_write_file(fd, buf, size)); +} + +#endif + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,375 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "os.h" + +#include "cow.h" +#include "cow_sys.h" + +#define PATH_LEN_V1 256 + +struct cow_header_v1 { + int magic; + int version; + char backing_file[PATH_LEN_V1]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +#define PATH_LEN_V2 MAXPATHLEN + +struct cow_header_v2 { + unsigned long magic; + unsigned long version; + char backing_file[PATH_LEN_V2]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in + * case other systems have different values for MAXPATHLEN + */ +#define PATH_LEN_V3 4096 + +/* Changes from V2 - + * PATH_LEN_V3 as described above + * Explicitly specify field bit lengths for systems with different + * lengths for the usual C types. Not sure whether char or + * time_t should be changed, this can be changed later without + * breaking compatibility + * Add alignment field so that different alignments can be used for the + * bitmap and data + * Add cow_format field to allow for the possibility of different ways + * of specifying the COW blocks. For now, the only value is 0, + * for the traditional COW bitmap. + * Move the backing_file field to the end of the header. This allows + * for the possibility of expanding it into the padding required + * by the bitmap alignment. + * The bitmap and data portions of the file will be aligned as specified + * by the alignment field. This is to allow COW files to be + * put on devices with restrictions on access alignments, such as + * /dev/raw, with a 512 byte alignment restriction. This also + * allows the data to be more aligned more strictly than on + * sector boundaries. This is needed for ubd-mmap, which needs + * the data to be page aligned. + * Fixed (finally!) the rounding bug + */ + +struct cow_header_v3 { + __u32 magic; + __u32 version; + time_t mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +}; + +/* COW format definitions - for now, we have only the usual COW bitmap */ +#define COW_BITMAP 0 + +union cow_header { + struct cow_header_v1 v1; + struct cow_header_v2 v2; + struct cow_header_v3 v3; +}; + +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 3 + +#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) +#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) + +void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out) +{ + if(version < 3){ + *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = (*data_offset_out + sectorsize - 1) / + sectorsize; + *data_offset_out *= sectorsize; + } + else { + *bitmap_len_out = DIV_ROUND(size, sectorsize); + *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = ROUND_UP(*data_offset_out, align); + } +} + +static int absolutize(char *to, int size, char *from) +{ + char save_cwd[256], *slash; + int remaining; + + if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { + cow_printf("absolutize : unable to get cwd - errno = %d\n", + errno); + return(-1); + } + slash = strrchr(from, '/'); + if(slash != NULL){ + *slash = '\0'; + if(chdir(from)){ + *slash = '/'; + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + *slash = '/'; + if(getcwd(to, size) == NULL){ + cow_printf("absolutize : unable to get cwd of '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + remaining = size - strlen(to); + if(strlen(slash) + 1 > remaining){ + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcat(to, slash); + } + else { + if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcpy(to, save_cwd); + strcat(to, "/"); + strcat(to, from); + } + chdir(save_cwd); + return(0); +} + +int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, long long *size) +{ + struct cow_header_v3 *header; + unsigned long modtime; + int err; + + err = cow_seek_file(fd, 0); + if(err < 0){ + cow_printf("write_cow_header - lseek failed, err = %d\n", -err); + goto out; + } + + err = -ENOMEM; + header = cow_malloc(sizeof(*header)); + if(header == NULL){ + cow_printf("Failed to allocate COW V3 header\n"); + goto out; + } + header->magic = htonl(COW_MAGIC); + header->version = htonl(COW_VERSION); + + err = -EINVAL; + if(strlen(backing_file) > sizeof(header->backing_file) - 1){ + cow_printf("Backing file name \"%s\" is too long - names are " + "limited to %d characters\n", backing_file, + sizeof(header->backing_file) - 1); + goto out_free; + } + + if(absolutize(header->backing_file, sizeof(header->backing_file), + backing_file)) + goto out_free; + + err = os_file_modtime(header->backing_file, &modtime); + if(err < 0){ + cow_printf("Backing file '%s' mtime request failed, " + "err = %d\n", header->backing_file, -err); + goto out_free; + } + + err = cow_file_size(header->backing_file, size); + if(err < 0){ + cow_printf("Couldn't get size of backing file '%s', " + "err = %d\n", header->backing_file, -err); + goto out_free; + } + + header->mtime = htonl(modtime); + header->size = htonll(*size); + header->sectorsize = htonl(sectorsize); + header->alignment = htonl(alignment); + header->cow_format = COW_BITMAP; + + err = os_write_file(fd, header, sizeof(*header)); + if(err != sizeof(*header)){ + cow_printf("Write of header to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + goto out_free; + } + err = 0; + out_free: + cow_free(header); + out: + return(err); +} + +int file_reader(__u64 offset, char *buf, int len, void *arg) +{ + int fd = *((int *) arg); + + return(pread(fd, buf, len, offset)); +} + +/* XXX Need to sanity-check the values read from the header */ + +int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, + __u32 *version_out, char **backing_file_out, + time_t *mtime_out, __u64 *size_out, + int *sectorsize_out, __u32 *align_out, + int *bitmap_offset_out) +{ + union cow_header *header; + char *file; + int err, n; + unsigned long version, magic; + + header = cow_malloc(sizeof(*header)); + if(header == NULL){ + cow_printf("read_cow_header - Failed to allocate header\n"); + return(-ENOMEM); + } + err = -EINVAL; + n = (*reader)(0, (char *) header, sizeof(*header), arg); + if(n < offsetof(typeof(header->v1), backing_file)){ + cow_printf("read_cow_header - short header\n"); + goto out; + } + + magic = header->v1.magic; + if(magic == COW_MAGIC) { + version = header->v1.version; + } + else if(magic == ntohl(COW_MAGIC)){ + version = ntohl(header->v1.version); + } + /* No error printed because the non-COW case comes through here */ + else goto out; + + *version_out = version; + + if(version == 1){ + if(n < sizeof(header->v1)){ + cow_printf("read_cow_header - failed to read V1 " + "header\n"); + goto out; + } + *mtime_out = header->v1.mtime; + *size_out = header->v1.size; + *sectorsize_out = header->v1.sectorsize; + *bitmap_offset_out = sizeof(header->v1); + *align_out = *sectorsize_out; + file = header->v1.backing_file; + } + else if(version == 2){ + if(n < sizeof(header->v2)){ + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v2.mtime); + *size_out = ntohll(header->v2.size); + *sectorsize_out = ntohl(header->v2.sectorsize); + *bitmap_offset_out = sizeof(header->v2); + *align_out = *sectorsize_out; + file = header->v2.backing_file; + } + else if(version == 3){ + if(n < sizeof(header->v3)){ + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v3.mtime); + *size_out = ntohll(header->v3.size); + *sectorsize_out = ntohl(header->v3.sectorsize); + *align_out = ntohl(header->v3.alignment); + *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); + file = header->v3.backing_file; + } + else { + cow_printf("read_cow_header - invalid COW version\n"); + goto out; + } + err = -ENOMEM; + *backing_file_out = cow_strdup(file); + if(*backing_file_out == NULL){ + cow_printf("read_cow_header - failed to allocate backing " + "file\n"); + goto out; + } + err = 0; + out: + cow_free(header); + return(err); +} + +int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, + int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + __u64 size, offset; + char zero = 0; + int err; + + err = write_cow_header(cow_file, fd, backing_file, sectorsize, + alignment, &size); + if(err) + goto out; + + *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); + cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + offset = *data_offset_out + size - sizeof(zero); + err = cow_seek_file(fd, offset); + if(err < 0){ + cow_printf("cow bitmap lseek failed : err = %d\n", -err); + goto out; + } + + /* does not really matter how much we write it is just to set EOF + * this also sets the entire COW bitmap + * to zero without having to allocate it + */ + err = cow_write_file(fd, &zero, sizeof(zero)); + if(err != sizeof(zero)){ + cow_printf("Write of bitmap to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + err = -EINVAL; + goto out; + } + + return(0); + + out: + return(err); +} + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/drivers/daemon_user.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/drivers/daemon_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -53,7 +53,8 @@ static int connect_to_switch(struct daem struct request_v3 req; int fd, n, err; - if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){ + pri->control = socket(AF_UNIX, SOCK_STREAM, 0); + if(pri->control < 0){ printk("daemon_open : control socket failed, errno = %d\n", errno); return(-errno); @@ -67,7 +68,8 @@ static int connect_to_switch(struct daem goto out; } - if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){ + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if(fd < 0){ printk("daemon_open : data socket failed, errno = %d\n", errno); err = -errno; @@ -91,18 +93,18 @@ static int connect_to_switch(struct daem req.version = SWITCH_VERSION; req.type = REQ_NEW_CONTROL; req.sock = *local_addr; - n = write(pri->control, &req, sizeof(req)); + n = os_write_file(pri->control, &req, sizeof(req)); if(n != sizeof(req)){ - printk("daemon_open : control setup request returned %d, " - "errno = %d\n", n, errno); + printk("daemon_open : control setup request failed, err = %d\n", + -n); err = -ENOTCONN; goto out; } - n = read(pri->control, sun, sizeof(*sun)); + n = os_read_file(pri->control, sun, sizeof(*sun)); if(n != sizeof(*sun)){ - printk("daemon_open : read of data socket returned %d, " - "errno = %d\n", n, errno); + printk("daemon_open : read of data socket failed, err = %d\n", + -n); err = -ENOTCONN; goto out_close; } @@ -111,9 +113,9 @@ static int connect_to_switch(struct daem return(fd); out_close: - close(fd); + os_close_file(fd); out: - close(pri->control); + os_close_file(pri->control); return(err); } @@ -153,8 +155,8 @@ static void daemon_remove(void *data) { struct daemon_data *pri = data; - close(pri->fd); - close(pri->control); + os_close_file(pri->fd); + os_close_file(pri->control); if(pri->data_addr != NULL) kfree(pri->data_addr); if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); if(pri->local_addr != NULL) kfree(pri->local_addr); --- linux-2.6.3-rc2/arch/um/drivers/fd.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/drivers/fd.c 2004-02-12 00:40:26.000000000 -0800 @@ -35,7 +35,8 @@ void *fd_init(char *str, int device, str printk("fd_init : couldn't parse file descriptor '%s'\n", str); return(NULL); } - if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + data = um_kmalloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct fd_chan) { .fd = n, .raw = opts->raw }); return(data); --- linux-2.6.3-rc2/arch/um/drivers/harddog_user.c 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/drivers/harddog_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -27,10 +27,10 @@ static void pre_exec(void *d) dup2(data->stdin, 0); dup2(data->stdout, 1); dup2(data->stdout, 2); - close(data->stdin); - close(data->stdout); - close(data->close_me[0]); - close(data->close_me[1]); + os_close_file(data->stdin); + os_close_file(data->stdout); + os_close_file(data->close_me[0]); + os_close_file(data->close_me[1]); } int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) @@ -44,15 +44,15 @@ int start_watchdog(int *in_fd_ret, int * char **args = NULL; err = os_pipe(in_fds, 1, 0); - if(err){ - printk("harddog_open - os_pipe failed, errno = %d\n", -err); - return(err); + if(err < 0){ + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out; } err = os_pipe(out_fds, 1, 0); - if(err){ - printk("harddog_open - os_pipe failed, errno = %d\n", -err); - return(err); + if(err < 0){ + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out_close_in; } data.stdin = out_fds[0]; @@ -72,42 +72,47 @@ int start_watchdog(int *in_fd_ret, int * pid = run_helper(pre_exec, &data, args, NULL); - close(out_fds[0]); - close(in_fds[1]); + os_close_file(out_fds[0]); + os_close_file(in_fds[1]); if(pid < 0){ err = -pid; - printk("harddog_open - run_helper failed, errno = %d\n", err); - goto out; + printk("harddog_open - run_helper failed, errno = %d\n", -err); + goto out_close_out; } - n = read(in_fds[0], &c, sizeof(c)); + n = os_read_file(in_fds[0], &c, sizeof(c)); if(n == 0){ printk("harddog_open - EOF on watchdog pipe\n"); helper_wait(pid); err = -EIO; - goto out; + goto out_close_out; } else if(n < 0){ printk("harddog_open - read of watchdog pipe failed, " - "errno = %d\n", errno); + "err = %d\n", -n); helper_wait(pid); - err = -errno; - goto out; + err = n; + goto out_close_out; } *in_fd_ret = in_fds[0]; *out_fd_ret = out_fds[1]; return(0); + + out_close_in: + os_close_file(in_fds[0]); + os_close_file(in_fds[1]); + out_close_out: + os_close_file(out_fds[0]); + os_close_file(out_fds[1]); out: - close(out_fds[1]); - close(in_fds[0]); return(err); } void stop_watchdog(int in_fd, int out_fd) { - close(in_fd); - close(out_fd); + os_close_file(in_fd); + os_close_file(out_fd); } int ping_watchdog(int fd) @@ -115,11 +120,12 @@ int ping_watchdog(int fd) int n; char c = '\n'; - n = write(fd, &c, sizeof(c)); - if(n < sizeof(c)){ - printk("ping_watchdog - write failed, errno = %d\n", - errno); - return(-errno); + n = os_write_file(fd, &c, sizeof(c)); + if(n != sizeof(c)){ + printk("ping_watchdog - write failed, err = %d\n", -n); + if(n < 0) + return(n); + return(-EIO); } return 1; --- linux-2.6.3-rc2/arch/um/drivers/hostaudio_kern.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/drivers/hostaudio_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,12 +5,12 @@ #include "linux/config.h" #include "linux/module.h" -#include "linux/version.h" #include "linux/init.h" #include "linux/slab.h" #include "linux/fs.h" #include "linux/sound.h" #include "linux/soundcard.h" +#include "asm/uaccess.h" #include "kern_util.h" #include "init.h" #include "hostaudio.h" @@ -19,30 +19,39 @@ char *dsp = HOSTAUDIO_DEV_DSP; char *mixer = HOSTAUDIO_DEV_MIXER; +#define DSP_HELP \ +" This is used to specify the host dsp device to the hostaudio driver.\n" \ +" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" + +#define MIXER_HELP \ +" This is used to specify the host mixer device to the hostaudio driver.\n" \ +" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" + #ifndef MODULE static int set_dsp(char *name, int *add) { - dsp = uml_strdup(name); + dsp = name; return(0); } -__uml_setup("dsp=", set_dsp, -"dsp=\n" -" This is used to specify the host dsp device to the hostaudio driver.\n" -" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" -); +__uml_setup("dsp=", set_dsp, "dsp=\n" DSP_HELP); static int set_mixer(char *name, int *add) { - mixer = uml_strdup(name); + mixer = name; return(0); } -__uml_setup("mixer=", set_mixer, -"mixer=\n" -" This is used to specify the host mixer device to the hostaudio driver.\n" -" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" -); +__uml_setup("mixer=", set_mixer, "mixer=\n" MIXER_HELP); + +#else /*MODULE*/ + +MODULE_PARM(dsp, "s"); +MODULE_PARM_DESC(dsp, DSP_HELP); + +MODULE_PARM(mixer, "s"); +MODULE_PARM_DESC(mixer, MIXER_HELP); + #endif /* /dev/dsp file operations */ @@ -51,23 +60,55 @@ static ssize_t hostaudio_read(struct fil loff_t *ppos) { struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; #ifdef DEBUG printk("hostaudio: read called, count = %d\n", count); #endif - return(hostaudio_read_user(state, buffer, count, ppos)); + kbuf = kmalloc(count, GFP_KERNEL); + if(kbuf == NULL) + return(-ENOMEM); + + err = hostaudio_read_user(state, kbuf, count, ppos); + if(err < 0) + goto out; + + if(copy_to_user(buffer, kbuf, err)) + err = -EFAULT; + + out: + kfree(kbuf); + return(err); } static ssize_t hostaudio_write(struct file *file, const char *buffer, size_t count, loff_t *ppos) { struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; #ifdef DEBUG printk("hostaudio: write called, count = %d\n", count); #endif - return(hostaudio_write_user(state, buffer, count, ppos)); + + kbuf = kmalloc(count, GFP_KERNEL); + if(kbuf == NULL) + return(-ENOMEM); + + err = -EFAULT; + if(copy_from_user(kbuf, buffer, count)) + goto out; + + err = hostaudio_write_user(state, kbuf, count, ppos); + if(err < 0) + goto out; + + out: + kfree(kbuf); + return(err); } static unsigned int hostaudio_poll(struct file *file, @@ -86,12 +127,43 @@ static int hostaudio_ioctl(struct inode unsigned int cmd, unsigned long arg) { struct hostaudio_state *state = file->private_data; + unsigned long data = 0; + int err; #ifdef DEBUG printk("hostaudio: ioctl called, cmd = %u\n", cmd); #endif + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if(get_user(data, (int *) arg)) + return(-EFAULT); + break; + default: + break; + } + + err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data); + + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if(put_user(data, (int *) arg)) + return(-EFAULT); + break; + default: + break; + } - return(hostaudio_ioctl_user(state, cmd, arg)); + return(err); } static int hostaudio_open(struct inode *inode, struct file *file) @@ -225,7 +297,8 @@ MODULE_LICENSE("GPL"); static int __init hostaudio_init_module(void) { - printk(KERN_INFO "UML Audio Relay\n"); + printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", + dsp, mixer); module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); if(module_data.dev_audio < 0){ --- linux-2.6.3-rc2/arch/um/drivers/hostaudio_user.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/drivers/hostaudio_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -4,9 +4,6 @@ */ #include -#include -#include -#include #include #include #include "hostaudio.h" @@ -20,45 +17,31 @@ ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, size_t count, loff_t *ppos) { - ssize_t ret; - #ifdef DEBUG printk("hostaudio: read_user called, count = %d\n", count); #endif - ret = read(state->fd, buffer, count); - - if(ret < 0) return(-errno); - return(ret); + return(os_read_file(state->fd, buffer, count)); } ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer, size_t count, loff_t *ppos) { - ssize_t ret; - #ifdef DEBUG printk("hostaudio: write_user called, count = %d\n", count); #endif - ret = write(state->fd, buffer, count); - - if(ret < 0) return(-errno); - return(ret); + return(os_write_file(state->fd, buffer, count)); } int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, unsigned long arg) { - int ret; #ifdef DEBUG printk("hostaudio: ioctl_user called, cmd = %u\n", cmd); #endif - ret = ioctl(state->fd, cmd, arg); - - if(ret < 0) return(-errno); - return(ret); + return(os_ioctl_generic(state->fd, cmd, arg)); } int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp) @@ -67,14 +50,15 @@ int hostaudio_open_user(struct hostaudio printk("hostaudio: open_user called\n"); #endif - state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); - - if(state->fd >= 0) return(0); + state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); - printk("hostaudio_open_user failed to open '%s', errno = %d\n", - dsp, errno); + if(state->fd < 0) { + printk("hostaudio_open_user failed to open '%s', err = %d\n", + dsp, -state->fd); + return(state->fd); + } - return(-errno); + return(0); } int hostaudio_release_user(struct hostaudio_state *state) @@ -82,10 +66,10 @@ int hostaudio_release_user(struct hostau #ifdef DEBUG printk("hostaudio: release called\n"); #endif - if(state->fd >= 0){ - close(state->fd); - state->fd=-1; - } + if(state->fd >= 0){ + os_close_file(state->fd); + state->fd = -1; + } return(0); } @@ -95,15 +79,11 @@ int hostaudio_release_user(struct hostau int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, unsigned int cmd, unsigned long arg) { - int ret; #ifdef DEBUG printk("hostmixer: ioctl_user called cmd = %u\n",cmd); #endif - ret = ioctl(state->fd, cmd, arg); - if(ret < 0) - return(-errno); - return(ret); + return(os_ioctl_generic(state->fd, cmd, arg)); } int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w, @@ -115,12 +95,13 @@ int hostmixer_open_mixdev_user(struct ho state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); - if(state->fd >= 0) return(0); - - printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n", - mixer, errno); + if(state->fd < 0) { + printk("hostaudio_open_mixdev_user failed to open '%s', " + "err = %d\n", mixer, state->fd); + return(state->fd); + } - return(-errno); + return(0); } int hostmixer_release_mixdev_user(struct hostmixer_state *state) @@ -130,7 +111,7 @@ int hostmixer_release_mixdev_user(struct #endif if(state->fd >= 0){ - close(state->fd); + os_close_file(state->fd); state->fd = -1; } --- linux-2.6.3-rc2/arch/um/drivers/line.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/drivers/line.c 2004-02-12 00:40:26.000000000 -0800 @@ -6,8 +6,8 @@ #include "linux/sched.h" #include "linux/slab.h" #include "linux/list.h" +#include "linux/interrupt.h" #include "linux/devfs_fs_kernel.h" -#include "asm/irq.h" #include "asm/uaccess.h" #include "chan_kern.h" #include "irq_user.h" @@ -16,38 +16,55 @@ #include "user_util.h" #include "kern_util.h" #include "os.h" +#include "irq_kern.h" #define LINE_BUFSIZE 4096 -void line_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused) { struct line *dev = data; if(dev->count > 0) chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, dev); + return IRQ_HANDLED; } -void line_timer_cb(void *arg) +static void line_timer_cb(void *arg) { struct line *dev = arg; line_interrupt(dev->driver->read_irq, dev, NULL); } -static void buffer_data(struct line *line, const char *buf, int len) +static int write_room(struct line *dev) { - int end; + int n; + + if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); + + n = dev->head - dev->tail; + if(n <= 0) n = LINE_BUFSIZE + n; + return(n - 1); +} + +static int buffer_data(struct line *line, const char *buf, int len) +{ + int end, room; if(line->buffer == NULL){ line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); if(line->buffer == NULL){ printk("buffer_data - atomic allocation failed\n"); - return; + return(0); } line->head = line->buffer; line->tail = line->buffer; } + + room = write_room(line); + len = (len > room) ? room : len; + end = line->buffer + LINE_BUFSIZE - line->tail; if(len < end){ memcpy(line->tail, buf, len); @@ -60,6 +77,8 @@ static void buffer_data(struct line *lin memcpy(line->buffer, buf, len); line->tail = line->buffer + len; } + + return(len); } static int flush_buffer(struct line *line) @@ -95,7 +114,7 @@ int line_write(struct line *lines, struc struct line *line; char *new; unsigned long flags; - int n, err, i; + int n, err, i, ret = 0; if(tty->stopped) return 0; @@ -104,9 +123,13 @@ int line_write(struct line *lines, struc if(new == NULL) return(0); n = copy_from_user(new, buf, len); - if(n == len) - return(-EFAULT); buf = new; + if(n == len){ + len = -EFAULT; + goto out_free; + } + + len -= n; } i = tty->index; @@ -115,41 +138,50 @@ int line_write(struct line *lines, struc down(&line->sem); if(line->head != line->tail){ local_irq_save(flags); - buffer_data(line, buf, len); + ret += buffer_data(line, buf, len); err = flush_buffer(line); local_irq_restore(flags); if(err <= 0) - goto out; + goto out_up; } else { n = write_chan(&line->chan_list, buf, len, line->driver->write_irq); if(n < 0){ - len = n; - goto out; + ret = n; + goto out_up; } - if(n < len) - buffer_data(line, buf + n, len - n); + + len -= n; + ret += n; + if(len > 0) + ret += buffer_data(line, buf + n, len); } - out: + out_up: up(&line->sem); - return(len); + out_free: + if(from_user) + kfree(buf); + return(ret); } -void line_write_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t line_write_interrupt(int irq, void *data, + struct pt_regs *unused) { struct line *dev = data; struct tty_struct *tty = dev->tty; int err; err = flush_buffer(dev); - if(err == 0) return; + if(err == 0) + return(IRQ_NONE); else if(err < 0){ dev->head = dev->buffer; dev->tail = dev->buffer; } - if(tty == NULL) return; + if(tty == NULL) + return(IRQ_NONE); if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && (tty->ldisc.write_wakeup != NULL)) @@ -161,21 +193,9 @@ void line_write_interrupt(int irq, void * writes. */ - if (waitqueue_active(&tty->write_wait)) + if(waitqueue_active(&tty->write_wait)) wake_up_interruptible(&tty->write_wait); - -} - -int line_write_room(struct tty_struct *tty) -{ - struct line *dev = tty->driver_data; - int n; - - if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); - - n = dev->head - dev->tail; - if(n <= 0) n = LINE_BUFSIZE + n; - return(n - 1); + return(IRQ_HANDLED); } int line_setup_irq(int fd, int input, int output, void *data) @@ -305,7 +325,7 @@ int line_setup(struct line *lines, int n if(*end != '='){ printk(KERN_ERR "line_setup failed to parse \"%s\"\n", init); - return(1); + return(0); } init = end; } @@ -313,12 +333,12 @@ int line_setup(struct line *lines, int n if((n >= 0) && (n >= num)){ printk("line_setup - %d out of range ((0 ... %d) allowed)\n", n, num); - return(1); + return(0); } else if(n >= 0){ if(lines[n].count > 0){ printk("line_setup - device %d is open\n", n); - return(1); + return(0); } if(lines[n].init_pri <= INIT_ONE){ lines[n].init_pri = INIT_ONE; @@ -332,7 +352,7 @@ int line_setup(struct line *lines, int n else if(!all_allowed){ printk("line_setup - can't configure all devices from " "mconsole\n"); - return(1); + return(0); } else { for(i = 0; i < num; i++){ @@ -346,7 +366,7 @@ int line_setup(struct line *lines, int n } } } - return(0); + return(1); } int line_config(struct line *lines, int num, char *str) @@ -357,7 +377,7 @@ int line_config(struct line *lines, int printk("line_config - uml_strdup failed\n"); return(-ENOMEM); } - return(line_setup(lines, num, new, 0)); + return(!line_setup(lines, num, new, 0)); } int line_get_config(char *name, struct line *lines, int num, char *str, @@ -369,7 +389,7 @@ int line_get_config(char *name, struct l dev = simple_strtoul(name, &end, 0); if((*end != '\0') || (end == name)){ - *error_out = "line_setup failed to parse device number"; + *error_out = "line_get_config failed to parse device number"; return(0); } @@ -379,15 +399,15 @@ int line_get_config(char *name, struct l } line = &lines[dev]; + down(&line->sem); - if(!line->valid) CONFIG_CHUNK(str, size, n, "none", 1); else if(line->count == 0) CONFIG_CHUNK(str, size, n, line->init_str, 1); else n = chan_config_string(&line->chan_list, str, size, error_out); - up(&line->sem); + return(n); } @@ -396,7 +416,14 @@ int line_remove(struct line *lines, int char config[sizeof("conxxxx=none\0")]; sprintf(config, "%s=none", str); - return(line_setup(lines, num, config, 0)); + return(!line_setup(lines, num, config, 0)); +} + +int line_write_room(struct tty_struct *tty) +{ + struct line *dev = tty->driver_data; + + return(write_room(dev)); } struct tty_driver *line_register_devfs(struct lines *set, @@ -412,7 +439,8 @@ struct tty_driver *line_register_devfs(s return NULL; driver->driver_name = line_driver->name; - driver->name = line_driver->devfs_name; + driver->name = line_driver->device_name; + driver->devfs_name = line_driver->devfs_name; driver->major = line_driver->major; driver->minor_start = line_driver->minor_start; driver->type = line_driver->type; @@ -432,7 +460,7 @@ struct tty_driver *line_register_devfs(s for(i = 0; i < nlines; i++){ if(!lines[i].valid) - tty_unregister_devfs(driver, i); + tty_unregister_device(driver, i); } mconsole_register_dev(&line_driver->mc); @@ -465,24 +493,25 @@ struct winch { struct line *line; }; -void winch_interrupt(int irq, void *data, struct pt_regs *unused) +irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) { struct winch *winch = data; struct tty_struct *tty; int err; char c; - err = generic_read(winch->fd, &c, NULL); - if(err < 0){ - if(err != -EAGAIN){ - printk("winch_interrupt : read failed, errno = %d\n", - -err); - printk("fd %d is losing SIGWINCH support\n", - winch->tty_fd); - free_irq(irq, data); - return; + if(winch->fd != -1){ + err = generic_read(winch->fd, &c, NULL); + if(err < 0){ + if(err != -EAGAIN){ + printk("winch_interrupt : read failed, " + "errno = %d\n", -err); + printk("fd %d is losing SIGWINCH support\n", + winch->tty_fd); + return(IRQ_HANDLED); + } + goto out; } - goto out; } tty = winch->line->tty; if(tty != NULL){ @@ -492,7 +521,9 @@ void winch_interrupt(int irq, void *data kill_pg(tty->pgrp, SIGWINCH, 1); } out: - reactivate_fd(winch->fd, WINCH_IRQ); + if(winch->fd != -1) + reactivate_fd(winch->fd, WINCH_IRQ); + return(IRQ_HANDLED); } DECLARE_MUTEX(winch_handler_sem); @@ -529,7 +560,10 @@ static void winch_cleanup(void) list_for_each(ele, &winch_handlers){ winch = list_entry(ele, struct winch, list); - close(winch->fd); + if(winch->fd != -1){ + deactivate_fd(winch->fd, WINCH_IRQ); + os_close_file(winch->fd); + } if(winch->pid != -1) os_kill_process(winch->pid, 1); } --- linux-2.6.3-rc2/arch/um/drivers/Makefile 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/drivers/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ # -# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) # Licensed under the GPL # @@ -39,6 +39,8 @@ obj-$(CONFIG_PTY_CHAN) += pty.o obj-$(CONFIG_TTY_CHAN) += tty.o obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o obj-$(CONFIG_UML_WATCHDOG) += harddog.o +obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o +obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-y += stdio_console.o $(CHAN_OBJS) @@ -46,7 +48,7 @@ USER_SINGLE_OBJS = $(foreach f,$(patsubs USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ null.o pty.o tty.o xterm.o -USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file)) +USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS) : %.o: %.c $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< --- linux-2.6.3-rc2/arch/um/drivers/mcast_user.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/drivers/mcast_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -23,6 +23,7 @@ #include "kern_util.h" #include "user_util.h" #include "user.h" +#include "os.h" #define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) @@ -62,7 +63,8 @@ static int mcast_open(void *data) goto out; } - if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){ + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0){ printk("mcast_open : data socket failed, errno = %d\n", errno); fd = -ENOMEM; @@ -72,7 +74,7 @@ static int mcast_open(void *data) if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -82,7 +84,7 @@ static int mcast_open(void *data) sizeof(pri->ttl)) < 0) { printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -91,7 +93,7 @@ static int mcast_open(void *data) if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -99,7 +101,7 @@ static int mcast_open(void *data) /* bind socket to mcast address */ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { printk("mcast_open : data bind failed, errno = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -115,7 +117,7 @@ static int mcast_open(void *data) "interface on the host.\n"); printk("eth0 should be configured in order to use the " "multicast transport.\n"); - close(fd); + os_close_file(fd); fd = -EINVAL; } @@ -137,7 +139,7 @@ static void mcast_close(int fd, void *da errno); } - close(fd); + os_close_file(fd); } int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) --- linux-2.6.3-rc2/arch/um/drivers/mconsole_kern.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/drivers/mconsole_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,6 +1,6 @@ /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -15,6 +15,9 @@ #include "linux/sysrq.h" #include "linux/workqueue.h" #include "linux/module.h" +#include "linux/file.h" +#include "linux/fs.h" +#include "linux/namei.h" #include "linux/proc_fs.h" #include "asm/irq.h" #include "asm/uaccess.h" @@ -27,6 +30,7 @@ #include "init.h" #include "os.h" #include "umid.h" +#include "irq_kern.h" static int do_unlink_socket(struct notifier_block *notifier, unsigned long what, void *data) @@ -67,7 +71,7 @@ void mc_work_proc(void *unused) DECLARE_WORK(mconsole_work, mc_work_proc, NULL); -void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) { int fd; struct mconsole_entry *new; @@ -75,9 +79,10 @@ void mconsole_interrupt(int irq, void *d fd = (int) dev_id; while (mconsole_get_request(fd, &req)){ - if(req.cmd->as_interrupt) (*req.cmd->handler)(&req); + if(req.cmd->context == MCONSOLE_INTR) + (*req.cmd->handler)(&req); else { - new = kmalloc(sizeof(req), GFP_ATOMIC); + new = kmalloc(sizeof(*new), GFP_ATOMIC); if(new == NULL) mconsole_reply(&req, "Out of memory", 1, 0); else { @@ -88,6 +93,7 @@ void mconsole_interrupt(int irq, void *d } if(!list_empty(&mc_requests)) schedule_work(&mconsole_work); reactivate_fd(fd, MCONSOLE_IRQ); + return(IRQ_HANDLED); } void mconsole_version(struct mc_request *req) @@ -100,20 +106,110 @@ void mconsole_version(struct mc_request mconsole_reply(req, version, 0, 0); } +void mconsole_log(struct mc_request *req) +{ + int len; + char *ptr = req->request.data; + + ptr += strlen("log"); + while(isspace(*ptr)) ptr++; + + len = req->len - (ptr - req->request.data); + printk("%.*s", len, ptr); + mconsole_reply(req, "", 0, 0); +} + +void mconsole_proc(struct mc_request *req) +{ + struct nameidata nd; + struct file_system_type *proc; + struct super_block *super; + struct file *file; + int n, err; + char *ptr = req->request.data, *buf; + + ptr += strlen("proc"); + while(isspace(*ptr)) ptr++; + + proc = get_fs_type("proc"); + if(proc == NULL){ + mconsole_reply(req, "procfs not registered", 1, 0); + goto out; + } + + super = (*proc->get_sb)(proc, 0, NULL, NULL); + put_filesystem(proc); + if(super == NULL){ + mconsole_reply(req, "Failed to get procfs superblock", 1, 0); + goto out; + } + up_write(&super->s_umount); + + nd.dentry = super->s_root; + nd.mnt = NULL; + nd.flags = O_RDONLY + 1; + nd.last_type = LAST_ROOT; + + err = link_path_walk(ptr, &nd); + if(err){ + mconsole_reply(req, "Failed to look up file", 1, 0); + goto out_kill; + } + + file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + if(IS_ERR(file)){ + mconsole_reply(req, "Failed to open file", 1, 0); + goto out_kill; + } + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if(buf == NULL){ + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + goto out_fput; + } + + if((file->f_op != NULL) && (file->f_op->read != NULL)){ + do { + n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, + &file->f_pos); + if(n >= 0){ + buf[n] = '\0'; + mconsole_reply(req, buf, 0, (n > 0)); + } + else { + mconsole_reply(req, "Read of file failed", + 1, 0); + goto out_free; + } + } while(n > 0); + } + else mconsole_reply(req, "", 0, 0); + + out_free: + kfree(buf); + out_fput: + fput(file); + out_kill: + deactivate_super(super); + out: ; +} + #define UML_MCONSOLE_HELPTEXT \ -"Commands: - version - Get kernel version - help - Print this message - halt - Halt UML - reboot - Reboot UML - config = - Add a new device to UML; - same syntax as command line - config - Query the configuration of a device - remove - Remove a device from UML - sysrq - Performs the SysRq action controlled by the letter - cad - invoke the Ctl-Alt-Del handler - stop - pause the UML; it will do nothing until it receives a 'go' - go - continue the UML after a 'stop' +"Commands: \n\ + version - Get kernel version \n\ + help - Print this message \n\ + halt - Halt UML \n\ + reboot - Reboot UML \n\ + config = - Add a new device to UML; \n\ + same syntax as command line \n\ + config - Query the configuration of a device \n\ + remove - Remove a device from UML \n\ + sysrq - Performs the SysRq action controlled by the letter \n\ + cad - invoke the Ctl-Alt-Del handler \n\ + stop - pause the UML; it will do nothing until it receives a 'go' \n\ + go - continue the UML after a 'stop' \n\ + log - make UML enter into the kernel log\n\ + proc - returns the contents of the UML's /proc/\n\ " void mconsole_help(struct mc_request *req) @@ -302,7 +398,7 @@ int mconsole_init(void) if(umid_file_name("mconsole", file, sizeof(file))) return(-1); snprintf(mconsole_socket_name, sizeof(file), "%s", file); - sock = create_unix_socket(file, sizeof(file)); + sock = os_create_unix_socket(file, sizeof(file), 1); if (sock < 0){ printk("Failed to initialize management console\n"); return(1); @@ -344,11 +440,16 @@ static int write_proc_mconsole(struct fi if(buf == NULL) return(-ENOMEM); - if(copy_from_user(buf, buffer, count)) - return(-EFAULT); + if(copy_from_user(buf, buffer, count)){ + count = -EFAULT; + goto out; + } + buf[count] = '\0'; mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); + out: + kfree(buf); return(count); } --- linux-2.6.3-rc2/arch/um/drivers/mconsole_user.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/drivers/mconsole_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,6 +1,6 @@ /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -18,16 +18,18 @@ #include "umid.h" static struct mconsole_command commands[] = { - { "version", mconsole_version, 1 }, - { "halt", mconsole_halt, 0 }, - { "reboot", mconsole_reboot, 0 }, - { "config", mconsole_config, 0 }, - { "remove", mconsole_remove, 0 }, - { "sysrq", mconsole_sysrq, 1 }, - { "help", mconsole_help, 1 }, - { "cad", mconsole_cad, 1 }, - { "stop", mconsole_stop, 0 }, - { "go", mconsole_go, 1 }, + { "version", mconsole_version, MCONSOLE_INTR }, + { "halt", mconsole_halt, MCONSOLE_PROC }, + { "reboot", mconsole_reboot, MCONSOLE_PROC }, + { "config", mconsole_config, MCONSOLE_PROC }, + { "remove", mconsole_remove, MCONSOLE_PROC }, + { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, + { "help", mconsole_help, MCONSOLE_INTR }, + { "cad", mconsole_cad, MCONSOLE_INTR }, + { "stop", mconsole_stop, MCONSOLE_PROC }, + { "go", mconsole_go, MCONSOLE_INTR }, + { "log", mconsole_log, MCONSOLE_INTR }, + { "proc", mconsole_proc, MCONSOLE_PROC }, }; /* Initialized in mconsole_init, which is an initcall */ @@ -139,6 +141,7 @@ int mconsole_reply(struct mc_request *re memcpy(reply.data, str, len); reply.data[len] = '\0'; total -= len; + str += len; reply.len = len + 1; len = sizeof(reply) + reply.len - sizeof(reply.data); --- linux-2.6.3-rc2/arch/um/drivers/mmapper_kern.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/drivers/mmapper_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -120,7 +120,10 @@ static int __init mmapper_init(void) printk(KERN_INFO "Mapper v0.1\n"); v_buf = (char *) find_iomem("mmapper", &mmapper_size); - if(mmapper_size == 0) return(0); + if(mmapper_size == 0){ + printk(KERN_ERR "mmapper_init - find_iomem failed\n"); + return(0); + } p_buf = __pa(v_buf); --- linux-2.6.3-rc2/arch/um/drivers/net_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/drivers/net_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -26,6 +26,7 @@ #include "mconsole_kern.h" #include "init.h" #include "irq_user.h" +#include "irq_kern.h" static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; LIST_HEAD(opened); @@ -37,7 +38,8 @@ static int uml_net_rx(struct net_device struct sk_buff *skb; /* If we can't allocate memory, try again next round. */ - if ((skb = dev_alloc_skb(dev->mtu)) == NULL) { + skb = dev_alloc_skb(dev->mtu); + if (skb == NULL) { lp->stats.rx_dropped++; return 0; } @@ -61,14 +63,14 @@ static int uml_net_rx(struct net_device return pkt_len; } -void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *dev = dev_id; struct uml_net_private *lp = dev->priv; int err; if(!netif_running(dev)) - return; + return(IRQ_NONE); spin_lock(&lp->lock); while((err = uml_net_rx(dev)) > 0) ; @@ -83,6 +85,7 @@ void uml_net_interrupt(int irq, void *de out: spin_unlock(&lp->lock); + return(IRQ_HANDLED); } static int uml_net_open(struct net_device *dev) @@ -252,37 +255,6 @@ void uml_net_user_timer_expire(unsigned #endif } -/* - * default do nothing hard header packet routines for struct net_device init. - * real ethernet transports will overwrite with real routines. - */ -static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, unsigned len) -{ - return(0); /* no change */ -} - -static int uml_net_rebuild_header(struct sk_buff *skb) -{ - return(0); /* ignore */ -} - -static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) -{ - return(-1); /* fail */ -} - -static void uml_net_header_cache_update(struct hh_cache *hh, - struct net_device *dev, unsigned char * haddr) -{ - /* ignore */ -} - -static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) -{ - return(0); /* nothing */ -} - static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; static struct list_head devices = LIST_HEAD_INIT(devices); @@ -292,7 +264,7 @@ static int eth_configure(int n, void *in struct uml_net *device; struct net_device *dev; struct uml_net_private *lp; - int err, size; + int save, err, size; size = transport->private_size + sizeof(struct uml_net_private) + sizeof(((struct uml_net_private *) 0)->user); @@ -334,12 +306,6 @@ static int eth_configure(int n, void *in snprintf(dev->name, sizeof(dev->name), "eth%d", n); device->dev = dev; - dev->hard_header = uml_net_hard_header; - dev->rebuild_header = uml_net_rebuild_header; - dev->hard_header_cache = uml_net_header_cache; - dev->header_cache_update= uml_net_header_cache_update; - dev->hard_header_parse = uml_net_header_parse; - (*transport->kern->init)(dev, init); dev->mtu = transport->user->max_packet; @@ -358,25 +324,37 @@ static int eth_configure(int n, void *in rtnl_lock(); err = register_netdevice(dev); rtnl_unlock(); - if (err) + if (err) { + device->dev = NULL; + /* XXX: should we call ->remove() here? */ + free_netdev(dev); return 1; + } lp = dev->priv; - INIT_LIST_HEAD(&lp->list); - spin_lock_init(&lp->lock); - lp->dev = dev; - lp->fd = -1; - lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 }; - lp->have_mac = device->have_mac; - lp->protocol = transport->kern->protocol; - lp->open = transport->user->open; - lp->close = transport->user->close; - lp->remove = transport->user->remove; - lp->read = transport->kern->read; - lp->write = transport->kern->write; - lp->add_address = transport->user->add_address; - lp->delete_address = transport->user->delete_address; - lp->set_mtu = transport->user->set_mtu; + /* lp.user is the first four bytes of the transport data, which + * has already been initialized. This structure assignment will + * overwrite that, so we make sure that .user gets overwritten with + * what it already has. + */ + save = lp->user[0]; + *lp = ((struct uml_net_private) + { .list = LIST_HEAD_INIT(lp->list), + .lock = SPIN_LOCK_UNLOCKED, + .dev = dev, + .fd = -1, + .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, + .have_mac = device->have_mac, + .protocol = transport->kern->protocol, + .open = transport->user->open, + .close = transport->user->close, + .remove = transport->user->remove, + .read = transport->kern->read, + .write = transport->kern->write, + .add_address = transport->user->add_address, + .delete_address = transport->user->delete_address, + .set_mtu = transport->user->set_mtu, + .user = { save } }); init_timer(&lp->tl); lp->tl.function = uml_net_user_timer_expire; @@ -609,7 +587,8 @@ static int net_remove(char *str) unregister_netdev(dev); list_del(&device->list); - free_netdev(device); + kfree(device); + free_netdev(dev); return(0); } --- linux-2.6.3-rc2/arch/um/drivers/net_user.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/drivers/net_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -26,8 +26,7 @@ int tap_open_common(void *dev, char *gat if(gate_addr == NULL) return(0); if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ - printk("Invalid tap IP address - '%s'\n", - gate_addr); + printk("Invalid tap IP address - '%s'\n", gate_addr); return(-EINVAL); } return(0); @@ -60,18 +59,18 @@ void read_output(int fd, char *output, i } *output = '\0'; - if(read(fd, &remain, sizeof(remain)) != sizeof(remain)){ - printk("read_output - read of length failed, errno = %d\n", - errno); + n = os_read_file(fd, &remain, sizeof(remain)); + if(n != sizeof(remain)){ + printk("read_output - read of length failed, err = %d\n", -n); return; } while(remain != 0){ n = (remain < len) ? remain : len; - actual = read(fd, output, n); + actual = os_read_file(fd, output, n); if(actual != n){ printk("read_output - read of data failed, " - "errno = %d\n", errno); + "err = %d\n", -actual); return; } remain -= actual; @@ -83,13 +82,12 @@ int net_read(int fd, void *buf, int len) { int n; - while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ; + n = os_read_file(fd, buf, len); - if(n < 0){ - if(errno == EAGAIN) return(0); - return(-errno); - } - else if(n == 0) return(-ENOTCONN); + if(n == -EAGAIN) + return(0); + else if(n == 0) + return(-ENOTCONN); return(n); } @@ -112,13 +110,13 @@ int net_write(int fd, void *buf, int len { int n; - while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ; - if(n < 0){ - if(errno == EAGAIN) return(0); - return(-errno); - } - else if(n == 0) return(-ENOTCONN); - return(n); + n = os_write_file(fd, buf, len); + + if(n == -EAGAIN) + return(0); + else if(n == 0) + return(-ENOTCONN); + return(n); } int net_send(int fd, void *buf, int len) @@ -157,7 +155,7 @@ static void change_pre_exec(void *arg) { struct change_pre_exec_data *data = arg; - close(data->close_me); + os_close_file(data->close_me); dup2(data->stdout, 1); } @@ -167,15 +165,15 @@ static int change_tramp(char **argv, cha struct change_pre_exec_data pe_data; err = os_pipe(fds, 1, 0); - if(err){ - printk("change_tramp - pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("change_tramp - pipe failed, err = %d\n", -err); return(err); } pe_data.close_me = fds[0]; pe_data.stdout = fds[1]; pid = run_helper(change_pre_exec, &pe_data, argv, NULL); - close(fds[1]); + os_close_file(fds[1]); read_output(fds[0], output, output_len); waitpid(pid, NULL, 0); return(pid); --- linux-2.6.3-rc2/arch/um/drivers/null.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/drivers/null.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include "chan_user.h" #include "os.h" --- linux-2.6.3-rc2/arch/um/drivers/port_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/drivers/port_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -6,6 +6,7 @@ #include "linux/list.h" #include "linux/sched.h" #include "linux/slab.h" +#include "linux/interrupt.h" #include "linux/irq.h" #include "linux/spinlock.h" #include "linux/errno.h" @@ -14,6 +15,7 @@ #include "kern_util.h" #include "kern.h" #include "irq_user.h" +#include "irq_kern.h" #include "port.h" #include "init.h" #include "os.h" @@ -38,21 +40,21 @@ struct port_dev { struct connection { struct list_head list; int fd; - int helper_pid; + int helper_pid; int socket[2]; int telnetd_pid; struct port_list *port; }; -static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs) { struct connection *conn = data; int fd; - fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); + fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); if(fd < 0){ if(fd == -EAGAIN) - return; + return(IRQ_NONE); printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", -fd); @@ -65,6 +67,7 @@ static void pipe_interrupt(int irq, void list_add(&conn->list, &conn->port->connections); up(&conn->port->sem); + return(IRQ_HANDLED); } static int port_accept(struct port_list *port) @@ -102,8 +105,7 @@ static int port_accept(struct port_list } list_add(&conn->list, &port->pending); - ret = 1; - goto out; + return(1); out_free: kfree(conn); @@ -138,12 +140,13 @@ void port_work_proc(void *unused) DECLARE_WORK(port_work, port_work_proc, NULL); -static void port_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs) { struct port_list *port = data; port->has_connection = 1; schedule_work(&port_work); + return(IRQ_HANDLED); } void *port_data(int port_num) --- linux-2.6.3-rc2/arch/um/drivers/port_user.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/drivers/port_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -47,10 +47,12 @@ void *port_init(char *str, int device, s return(NULL); } - if((kern_data = port_data(port)) == NULL) + kern_data = port_data(port); + if(kern_data == NULL) return(NULL); - if((data = um_kmalloc(sizeof(*data))) == NULL) + data = um_kmalloc(sizeof(*data)); + if(data == NULL) goto err; *data = ((struct port_chan) { .raw = opts->raw, @@ -90,7 +92,7 @@ void port_close(int fd, void *d) struct port_chan *data = d; port_remove_dev(data->kernel_data); - close(fd); + os_close_file(fd); } int port_console_write(int fd, const char *buf, int n, void *d) @@ -130,11 +132,15 @@ int port_listen_fd(int port) goto out; } - if((listen(fd, 1) < 0) || (os_set_fd_block(fd, 0))){ + if(listen(fd, 1) < 0){ err = -errno; goto out; } + err = os_set_fd_block(fd, 0); + if(err < 0) + goto out; + return(fd); out: os_close_file(fd); @@ -153,10 +159,10 @@ void port_pre_exec(void *arg) dup2(data->sock_fd, 0); dup2(data->sock_fd, 1); dup2(data->sock_fd, 2); - close(data->sock_fd); + os_close_file(data->sock_fd); dup2(data->pipe_fd, 3); os_shutdown_socket(3, 1, 0); - close(data->pipe_fd); + os_close_file(data->pipe_fd); } int port_connection(int fd, int *socket, int *pid_out) @@ -166,11 +172,12 @@ int port_connection(int fd, int *socket, "/usr/lib/uml/port-helper", NULL }; struct port_pre_exec_data data; - if((new = os_accept_connection(fd)) < 0) - return(-errno); + new = os_accept_connection(fd); + if(new < 0) + return(new); err = os_pipe(socket, 0, 0); - if(err) + if(err < 0) goto out_close; data = ((struct port_pre_exec_data) @@ -186,11 +193,11 @@ int port_connection(int fd, int *socket, out_shutdown: os_shutdown_socket(socket[0], 1, 1); - close(socket[0]); + os_close_file(socket[0]); os_shutdown_socket(socket[1], 1, 1); - close(socket[1]); + os_close_file(socket[1]); out_close: - close(new); + os_close_file(new); return(err); } --- linux-2.6.3-rc2/arch/um/drivers/pty.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/drivers/pty.c 2004-02-12 00:40:26.000000000 -0800 @@ -7,12 +7,12 @@ #include #include #include -#include #include #include "chan_user.h" #include "user.h" #include "user_util.h" #include "kern_util.h" +#include "os.h" struct pty_chan { void (*announce)(char *dev_name, int dev); @@ -26,7 +26,8 @@ void *pty_chan_init(char *str, int devic { struct pty_chan *data; - if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + data = um_kmalloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct pty_chan) { .announce = opts->announce, .dev = device, .raw = opts->raw }); @@ -39,7 +40,8 @@ int pts_open(int input, int output, int char *dev; int fd; - if((fd = get_pty()) < 0){ + fd = get_pty(); + if(fd < 0){ printk("open_pts : Failed to open pts\n"); return(-errno); } @@ -57,29 +59,27 @@ int pts_open(int input, int output, int int getmaster(char *line) { - struct stat stb; char *pty, *bank, *cp; - int master; + int master, err; pty = &line[strlen("/dev/ptyp")]; for (bank = "pqrs"; *bank; bank++) { line[strlen("/dev/pty")] = *bank; *pty = '0'; - if (stat(line, &stb) < 0) + if (os_stat_file(line, NULL) < 0) break; for (cp = "0123456789abcdef"; *cp; cp++) { *pty = *cp; - master = open(line, O_RDWR); + master = os_open_file(line, of_rdwr(OPENFLAGS()), 0); if (master >= 0) { char *tp = &line[strlen("/dev/")]; - int ok; /* verify slave side is usable */ *tp = 't'; - ok = access(line, R_OK|W_OK) == 0; + err = os_access(line, OS_ACC_RW_OK); *tp = 'p'; - if (ok) return(master); - (void) close(master); + if(err == 0) return(master); + (void) os_close_file(master); } } } --- linux-2.6.3-rc2/arch/um/drivers/slip_user.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/drivers/slip_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -4,11 +4,9 @@ #include #include #include -#include #include #include #include -#include #include #include "user_util.h" #include "kern_util.h" @@ -65,9 +63,9 @@ static void slip_pre_exec(void *arg) { struct slip_pre_exec_data *data = arg; - if(data->stdin != -1) dup2(data->stdin, 0); + if(data->stdin >= 0) dup2(data->stdin, 0); dup2(data->stdout, 1); - if(data->close_me != -1) close(data->close_me); + if(data->close_me >= 0) os_close_file(data->close_me); } static int slip_tramp(char **argv, int fd) @@ -77,8 +75,8 @@ static int slip_tramp(char **argv, int f int status, pid, fds[2], err, output_len; err = os_pipe(fds, 1, 0); - if(err){ - printk("slip_tramp : pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("slip_tramp : pipe failed, err = %d\n", -err); return(err); } @@ -96,7 +94,7 @@ static int slip_tramp(char **argv, int f printk("slip_tramp : failed to allocate output " "buffer\n"); - close(fds[1]); + os_close_file(fds[1]); read_output(fds[0], output, output_len); if(output != NULL){ printk("%s", output); @@ -105,7 +103,7 @@ static int slip_tramp(char **argv, int f if(waitpid(pid, &status, 0) < 0) err = errno; else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ printk("'%s' didn't exit with status 0\n", argv[0]); - err = EINVAL; + err = -EINVAL; } } return(err); @@ -118,15 +116,17 @@ static int slip_open(void *data) char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, NULL }; - int sfd, mfd, disc, sencap, err; + int sfd, mfd, err; - if((mfd = get_pty()) < 0){ - printk("umn : Failed to open pty\n"); - return(-1); + mfd = get_pty(); + if(mfd < 0){ + printk("umn : Failed to open pty, err = %d\n", -mfd); + return(mfd); } - if((sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0)) < 0){ - printk("Couldn't open tty for slip line\n"); - return(-1); + sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); + if(sfd < 0){ + printk("Couldn't open tty for slip line, err = %d\n", -sfd); + return(sfd); } if(set_up_tty(sfd)) return(-1); pri->slave = sfd; @@ -138,28 +138,23 @@ static int slip_open(void *data) err = slip_tramp(argv, sfd); - if(err != 0){ - printk("slip_tramp failed - errno = %d\n", err); - return(-err); + if(err < 0){ + printk("slip_tramp failed - err = %d\n", -err); + return(err); } - if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){ - printk("SIOCGIFNAME failed, errno = %d\n", errno); - return(-errno); + err = os_get_ifname(pri->slave, pri->name); + if(err < 0){ + printk("get_ifname failed, err = %d\n", -err); + return(err); } iter_addresses(pri->dev, open_addr, pri->name); } else { - disc = N_SLIP; - if(ioctl(sfd, TIOCSETD, &disc) < 0){ - printk("Failed to set slip line discipline - " - "errno = %d\n", errno); - return(-errno); - } - sencap = 0; - if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){ - printk("Failed to set slip encapsulation - " - "errno = %d\n", errno); - return(-errno); + err = os_set_slip(sfd); + if(err < 0){ + printk("Failed to set slip discipline encapsulation - " + "err = %d\n", -err); + return(err); } } return(mfd); @@ -181,9 +176,9 @@ static void slip_close(int fd, void *dat err = slip_tramp(argv, -1); if(err != 0) - printk("slip_tramp failed - errno = %d\n", err); - close(fd); - close(pri->slave); + printk("slip_tramp failed - errno = %d\n", -err); + os_close_file(fd); + os_close_file(pri->slave); pri->slave = -1; } @@ -243,7 +238,7 @@ static void slip_add_addr(unsigned char { struct slip_data *pri = data; - if(pri->slave == -1) return; + if(pri->slave < 0) return; open_addr(addr, netmask, pri->name); } @@ -252,7 +247,7 @@ static void slip_del_addr(unsigned char { struct slip_data *pri = data; - if(pri->slave == -1) return; + if(pri->slave < 0) return; close_addr(addr, netmask, pri->name); } --- linux-2.6.3-rc2/arch/um/drivers/slirp_user.c 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/drivers/slirp_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -48,15 +47,15 @@ static int slirp_tramp(char **argv, int return(pid); } - + +/* XXX This is just a trivial wrapper around os_pipe */ static int slirp_datachan(int *mfd, int *sfd) { int fds[2], err; err = os_pipe(fds, 1, 1); - if(err){ - printk("slirp_datachan: Failed to open pipe, errno = %d\n", - -err); + if(err < 0){ + printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); return(err); } @@ -77,7 +76,7 @@ static int slirp_open(void *data) pid = slirp_tramp(pri->argw.argv, sfd); if(pid < 0){ - printk("slirp_tramp failed - errno = %d\n", pid); + printk("slirp_tramp failed - errno = %d\n", -pid); os_close_file(sfd); os_close_file(mfd); return(pid); @@ -97,8 +96,8 @@ static void slirp_close(int fd, void *da struct slirp_data *pri = data; int status,err; - close(fd); - close(pri->slave); + os_close_file(fd); + os_close_file(pri->slave); pri->slave = -1; --- linux-2.6.3-rc2/arch/um/drivers/ssl.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/um/drivers/ssl.c 2004-02-12 00:40:26.000000000 -0800 @@ -10,6 +10,7 @@ #include "linux/major.h" #include "linux/mm.h" #include "linux/init.h" +#include "linux/console.h" #include "asm/termbits.h" #include "asm/irq.h" #include "line.h" @@ -53,8 +54,9 @@ static int ssl_remove(char *str); static struct line_driver driver = { .name = "UML serial line", - .devfs_name = "tts/%d", - .major = TTYAUX_MAJOR, + .device_name = "ttS", + .devfs_name = "tts/", + .major = TTY_MAJOR, .minor_start = 64, .type = TTY_DRIVER_TYPE_SERIAL, .subtype = 0, @@ -149,6 +151,9 @@ static int ssl_ioctl(struct tty_struct * case TCSETSW: case TCGETA: case TIOCMGET: + case TCSBRK: + case TCSBRKP: + case TIOCMSET: ret = -ENOIOCTLCMD; break; default: @@ -212,6 +217,39 @@ static struct tty_operations ssl_ops = { */ static int ssl_init_done = 0; +extern int tty_init(void); + +static void ssl_console_write(struct console *c, const char *string, + unsigned len) +{ + struct line *line = &serial_lines[c->index]; + if(ssl_init_done) + down(&line->sem); + console_write_chan(&line->chan_list, string, len); + if(ssl_init_done) + up(&line->sem); +} + +static struct tty_driver *ssl_console_device(struct console *c, int *index) +{ + *index = c->index; + return ssl_driver; +} + +static int ssl_console_setup(struct console *co, char *options) +{ + return(0); +} + +static struct console ssl_cons = { + name: "ttyS", + write: ssl_console_write, + device: ssl_console_device, + setup: ssl_console_setup, + flags: CON_PRINTBUFFER, + index: -1, +}; + int ssl_init(void) { char *new_title; @@ -219,6 +257,8 @@ int ssl_init(void) printk(KERN_INFO "Initializing software serial port version %d\n", ssl_version); + tty_init(); + ssl_driver = line_register_devfs(&lines, &driver, &ssl_ops, serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0])); @@ -227,6 +267,7 @@ int ssl_init(void) new_title = add_xterm_umid(opts.xterm_title); if(new_title != NULL) opts.xterm_title = new_title; + register_console(&ssl_cons); ssl_init_done = 1; return(0); } @@ -235,9 +276,9 @@ __initcall(ssl_init); static int ssl_chan_setup(char *str) { - line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]), - str, 1); - return(1); + return(line_setup(serial_lines, + sizeof(serial_lines)/sizeof(serial_lines[0]), + str, 1)); } __setup("ssl", ssl_chan_setup); --- linux-2.6.3-rc2/arch/um/drivers/stdio_console.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/drivers/stdio_console.c 2004-02-12 00:40:26.000000000 -0800 @@ -83,7 +83,8 @@ static int con_remove(char *str); static struct line_driver driver = { .name = "UML console", - .devfs_name = "vc/%d", + .device_name = "tty", + .devfs_name = "vc/", .major = TTY_MAJOR, .minor_start = 0, .type = TTY_DRIVER_TYPE_CONSOLE, @@ -159,14 +160,28 @@ static int chars_in_buffer(struct tty_st static int con_init_done = 0; +static struct tty_operations console_ops = { + .open = con_open, + .close = con_close, + .write = con_write, + .chars_in_buffer = chars_in_buffer, + .set_termios = set_termios, + .write_room = line_write_room, +}; + +extern int tty_init(void); + int stdio_init(void) { char *new_title; printk(KERN_INFO "Initializing stdio console driver\n"); + tty_init(); + console_driver = line_register_devfs(&console_lines, &driver, - &console_ops, vts, sizeof(vts)/sizeof(vts[0])); + &console_ops, vts, + sizeof(vts)/sizeof(vts[0])); lines_init(vts, sizeof(vts)/sizeof(vts[0])); @@ -183,19 +198,14 @@ __initcall(stdio_init); static void console_write(struct console *console, const char *string, unsigned len) { - if(con_init_done) down(&vts[console->index].sem); - console_write_chan(&vts[console->index].chan_list, string, len); - if(con_init_done) up(&vts[console->index].sem); -} + struct line *line = &vts[console->index]; -static struct tty_operations console_ops = { - .open = con_open, - .close = con_close, - .write = con_write, - .chars_in_buffer = chars_in_buffer, - .set_termios = set_termios, - .write_room = line_write_room, -}; + if(con_init_done) + down(&line->sem); + console_write_chan(&line->chan_list, string, len); + if(con_init_done) + up(&line->sem); +} static struct tty_driver *console_device(struct console *c, int *index) { @@ -208,22 +218,28 @@ static int console_setup(struct console return(0); } -static struct console stdiocons = INIT_CONSOLE("tty", console_write, - console_device, console_setup, - CON_PRINTBUFFER); +static struct console stdiocons = { + name: "tty", + write: console_write, + device: console_device, + setup: console_setup, + flags: CON_PRINTBUFFER, + index: -1, +}; -static void __init stdio_console_init(void) +static int __init stdio_console_init(void) { INIT_LIST_HEAD(&vts[0].chan_list); list_add(&init_console_chan.list, &vts[0].chan_list); register_console(&stdiocons); + return(0); } + console_initcall(stdio_console_init); static int console_chan_setup(char *str) { - line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1); - return(1); + return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1)); } __setup("con", console_chan_setup); --- linux-2.6.3-rc2/arch/um/drivers/tty.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/drivers/tty.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include #include #include "chan_user.h" @@ -30,7 +29,8 @@ void *tty_chan_init(char *str, int devic } str++; - if((data = um_kmalloc(sizeof(*data))) == NULL) + data = um_kmalloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct tty_chan) { .dev = str, .raw = opts->raw }); --- linux-2.6.3-rc2/arch/um/drivers/ubd_kern.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/um/drivers/ubd_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,6 +8,13 @@ * old style ubd by setting UBD_SHIFT to 0 * 2002-09-27...2002-10-18 massive tinkering for 2.5 * partitions have changed in 2.5 + * 2003-01-29 more tinkering for 2.5.59-1 + * This should now address the sysfs problems and has + * the symlink for devfs to allow for booting with + * the common /dev/ubd/discX/... names rather than + * only /dev/ubdN/discN this version also has lots of + * clean ups preparing for ubd-many. + * James McMechan */ #define MAJOR_NR UBD_MAJOR @@ -40,9 +47,12 @@ #include "mconsole_kern.h" #include "init.h" #include "irq_user.h" +#include "irq_kern.h" #include "ubd_user.h" #include "2_5compat.h" #include "os.h" +#include "mem.h" +#include "mem_kern.h" static spinlock_t ubd_io_lock = SPIN_LOCK_UNLOCKED; static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; @@ -56,6 +66,10 @@ static int ubd_ioctl(struct inode * inod #define MAX_DEV (8) +/* Changed in early boot */ +static int ubd_do_mmap = 0; +#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE + static struct block_device_operations ubd_blops = { .owner = THIS_MODULE, .open = ubd_open, @@ -67,7 +81,7 @@ static struct block_device_operations ub static request_queue_t *ubd_queue; /* Protected by ubd_lock */ -static int fake_major = 0; +static int fake_major = MAJOR_NR; static struct gendisk *ubd_gendisk[MAX_DEV]; static struct gendisk *fake_gendisk[MAX_DEV]; @@ -96,13 +110,19 @@ struct cow { struct ubd { char *file; - int is_dir; int count; int fd; __u64 size; struct openflags boot_openflags; struct openflags openflags; + int no_cow; struct cow cow; + + int map_writes; + int map_reads; + int nomap_writes; + int nomap_reads; + int write_maps; }; #define DEFAULT_COW { \ @@ -115,21 +135,28 @@ struct ubd { #define DEFAULT_UBD { \ .file = NULL, \ - .is_dir = 0, \ .count = 0, \ .fd = -1, \ .size = -1, \ .boot_openflags = OPEN_FLAGS, \ .openflags = OPEN_FLAGS, \ + .no_cow = 0, \ .cow = DEFAULT_COW, \ + .map_writes = 0, \ + .map_reads = 0, \ + .nomap_writes = 0, \ + .nomap_reads = 0, \ + .write_maps = 0, \ } struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; static int ubd0_init(void) { - if(ubd_dev[0].file == NULL) - ubd_dev[0].file = "root_fs"; + struct ubd *dev = &ubd_dev[0]; + + if(dev->file == NULL) + dev->file = "root_fs"; return(0); } @@ -196,19 +223,46 @@ __uml_help(fake_ide_setup, " Create ide0 entries that map onto ubd devices.\n\n" ); +static int parse_unit(char **ptr) +{ + char *str = *ptr, *end; + int n = -1; + + if(isdigit(*str)) { + n = simple_strtoul(str, &end, 0); + if(end == str) + return(-1); + *ptr = end; + } + else if (('a' <= *str) && (*str <= 'h')) { + n = *str - 'a'; + str++; + *ptr = str; + } + return(n); +} + static int ubd_setup_common(char *str, int *index_out) { + struct ubd *dev; struct openflags flags = global_openflags; char *backing_file; int n, err; if(index_out) *index_out = -1; - n = *str++; + n = *str; if(n == '='){ - static int fake_major_allowed = 1; char *end; int major; + str++; + if(!strcmp(str, "mmap")){ + CHOOSE_MODE(printk("mmap not supported by the ubd " + "driver in tt mode\n"), + ubd_do_mmap = 1); + return(0); + } + if(!strcmp(str, "sync")){ global_openflags.s = 1; return(0); @@ -220,20 +274,14 @@ static int ubd_setup_common(char *str, i return(1); } - if(!fake_major_allowed){ - printk(KERN_ERR "Can't assign a fake major twice\n"); - return(1); - } - err = 1; spin_lock(&ubd_lock); - if(!fake_major_allowed){ + if(fake_major != MAJOR_NR){ printk(KERN_ERR "Can't assign a fake major twice\n"); goto out1; } fake_major = major; - fake_major_allowed = 0; printk(KERN_INFO "Setting extra ubd major number to %d\n", major); @@ -243,25 +291,23 @@ static int ubd_setup_common(char *str, i return(err); } - if(n < '0'){ - printk(KERN_ERR "ubd_setup : index out of range\n"); } - - if((n >= '0') && (n <= '9')) n -= '0'; - else if((n >= 'a') && (n <= 'z')) n -= 'a'; - else { - printk(KERN_ERR "ubd_setup : device syntax invalid\n"); + n = parse_unit(&str); + if(n < 0){ + printk(KERN_ERR "ubd_setup : couldn't parse unit number " + "'%s'\n", str); return(1); } if(n >= MAX_DEV){ - printk(KERN_ERR "ubd_setup : index out of range " - "(%d devices)\n", MAX_DEV); + printk(KERN_ERR "ubd_setup : index %d out of range " + "(%d devices)\n", n, MAX_DEV); return(1); } err = 1; spin_lock(&ubd_lock); - if(ubd_dev[n].file != NULL){ + dev = &ubd_dev[n]; + if(dev->file != NULL){ printk(KERN_ERR "ubd_setup : device already configured\n"); goto out2; } @@ -276,6 +322,11 @@ static int ubd_setup_common(char *str, i flags.s = 1; str++; } + if (*str == 'd'){ + dev->no_cow = 1; + str++; + } + if(*str++ != '='){ printk(KERN_ERR "ubd_setup : Expected '='\n"); goto out2; @@ -284,14 +335,17 @@ static int ubd_setup_common(char *str, i err = 0; backing_file = strchr(str, ','); if(backing_file){ - *backing_file = '\0'; - backing_file++; + if(dev->no_cow) + printk(KERN_ERR "Can't specify both 'd' and a " + "cow file\n"); + else { + *backing_file = '\0'; + backing_file++; + } } - ubd_dev[n].file = str; - if(ubd_is_dir(ubd_dev[n].file)) - ubd_dev[n].is_dir = 1; - ubd_dev[n].cow.file = backing_file; - ubd_dev[n].boot_openflags = flags; + dev->file = str; + dev->cow.file = backing_file; + dev->boot_openflags = flags; out2: spin_unlock(&ubd_lock); return(err); @@ -321,8 +375,7 @@ __uml_help(ubd_setup, static int fakehd_set = 0; static int fakehd(char *str) { - printk(KERN_INFO - "fakehd : Changing ubd name to \"hd\".\n"); + printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); fakehd_set = 1; return 1; } @@ -368,32 +421,42 @@ static void ubd_handler(void) { struct io_thread_req req; struct request *rq = elv_next_request(ubd_queue); - int n; + int n, err; do_ubd = NULL; intr_count++; n = read_ubd_fs(thread_fd, &req, sizeof(req)); if(n != sizeof(req)){ printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " - "errno = %d\n", os_getpid(), -n); + "err = %d\n", os_getpid(), -n); spin_lock(&ubd_io_lock); end_request(rq, 0); spin_unlock(&ubd_io_lock); return; } - if((req.offset != ((__u64) (rq->sector)) << 9) || - (req.length != (rq->current_nr_sectors) << 9)) + if((req.op != UBD_MMAP) && + ((req.offset != ((__u64) (rq->sector)) << 9) || + (req.length != (rq->current_nr_sectors) << 9))) panic("I/O op mismatch"); + if(req.map_fd != -1){ + err = physmem_subst_mapping(req.buffer, req.map_fd, + req.map_offset, 1); + if(err) + printk("ubd_handler - physmem_subst_mapping failed, " + "err = %d\n", -err); + } + ubd_finish(rq, req.error); reactivate_fd(thread_fd, UBD_IRQ); do_ubd_request(ubd_queue); } -static void ubd_intr(int irq, void *dev, struct pt_regs *unused) +static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { ubd_handler(); + return(IRQ_HANDLED); } /* Only changed by ubd_init, which is an initcall. */ @@ -429,18 +492,20 @@ static void ubd_close(struct ubd *dev) static int ubd_open_dev(struct ubd *dev) { struct openflags flags; - int err, n, create_cow, *create_ptr; + char **back_ptr; + int err, create_cow, *create_ptr; + dev->openflags = dev->boot_openflags; create_cow = 0; create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; - dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file, + back_ptr = dev->no_cow ? NULL : &dev->cow.file; + dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - n = dev - ubd_dev; dev->fd = create_cow_file(dev->file, dev->cow.file, - dev->openflags, 1 << 9, + dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, &dev->cow.data_offset); @@ -455,13 +520,17 @@ static int ubd_open_dev(struct ubd *dev) if(dev->cow.file != NULL){ err = -ENOMEM; dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); - if(dev->cow.bitmap == NULL) goto error; + if(dev->cow.bitmap == NULL){ + printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); + goto error; + } flush_tlb_kernel_vm(); err = read_cow_bitmap(dev->fd, dev->cow.bitmap, dev->cow.bitmap_offset, dev->cow.bitmap_len); - if(err) goto error; + if(err < 0) + goto error; flags = dev->openflags; flags.w = 0; @@ -481,17 +550,31 @@ static int ubd_new_disk(int major, u64 s { struct gendisk *disk; + char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")]; + int err; disk = alloc_disk(1 << UBD_SHIFT); - if (!disk) - return -ENOMEM; + if(disk == NULL) + return(-ENOMEM); disk->major = major; disk->first_minor = unit << UBD_SHIFT; disk->fops = &ubd_blops; set_capacity(disk, size / 512); - sprintf(disk->disk_name, "ubd"); - sprintf(disk->devfs_name, "ubd/disc%d", unit); + if(major == MAJOR_NR){ + sprintf(disk->disk_name, "ubd%c", 'a' + unit); + sprintf(disk->devfs_name, "ubd/disc%d", unit); + sprintf(from, "ubd/%d", unit); + sprintf(to, "disc%d/disc", unit); + err = devfs_mk_symlink(from, to); + if(err) + printk("ubd_new_disk failed to make link from %s to " + "%s, error = %d\n", from, to, err); + } + else { + sprintf(disk->disk_name, "ubd_fake%d", unit); + sprintf(disk->devfs_name, "ubd_fake/disc%d", unit); + } disk->private_data = &ubd_dev[unit]; disk->queue = ubd_queue; @@ -506,24 +589,21 @@ static int ubd_add(int n) struct ubd *dev = &ubd_dev[n]; int err; - if(dev->is_dir) - return(-EISDIR); - - if (!dev->file) + if(dev->file == NULL) return(-ENODEV); if (ubd_open_dev(dev)) return(-ENODEV); err = ubd_file_size(dev, &dev->size); - if(err) + if(err < 0) return(err); err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); if(err) return(err); - if(fake_major) + if(fake_major != MAJOR_NR) ubd_new_disk(fake_major, dev->size, n, &fake_gendisk[n]); @@ -561,42 +641,42 @@ static int ubd_config(char *str) return(err); } -static int ubd_get_config(char *dev, char *str, int size, char **error_out) +static int ubd_get_config(char *name, char *str, int size, char **error_out) { - struct ubd *ubd; + struct ubd *dev; char *end; - int major, n = 0; + int n, len = 0; - major = simple_strtoul(dev, &end, 0); - if((*end != '\0') || (end == dev)){ - *error_out = "ubd_get_config : didn't parse major number"; + n = simple_strtoul(name, &end, 0); + if((*end != '\0') || (end == name)){ + *error_out = "ubd_get_config : didn't parse device number"; return(-1); } - if((major >= MAX_DEV) || (major < 0)){ - *error_out = "ubd_get_config : major number out of range"; + if((n >= MAX_DEV) || (n < 0)){ + *error_out = "ubd_get_config : device number out of range"; return(-1); } - ubd = &ubd_dev[major]; + dev = &ubd_dev[n]; spin_lock(&ubd_lock); - if(ubd->file == NULL){ - CONFIG_CHUNK(str, size, n, "", 1); + if(dev->file == NULL){ + CONFIG_CHUNK(str, size, len, "", 1); goto out; } - CONFIG_CHUNK(str, size, n, ubd->file, 0); + CONFIG_CHUNK(str, size, len, dev->file, 0); - if(ubd->cow.file != NULL){ - CONFIG_CHUNK(str, size, n, ",", 0); - CONFIG_CHUNK(str, size, n, ubd->cow.file, 1); + if(dev->cow.file != NULL){ + CONFIG_CHUNK(str, size, len, ",", 0); + CONFIG_CHUNK(str, size, len, dev->cow.file, 1); } - else CONFIG_CHUNK(str, size, n, "", 1); + else CONFIG_CHUNK(str, size, len, "", 1); out: spin_unlock(&ubd_lock); - return(n); + return(len); } static int ubd_remove(char *str) @@ -604,11 +684,9 @@ static int ubd_remove(char *str) struct ubd *dev; int n, err = -ENODEV; - if(!isdigit(*str)) - return(err); /* it should be a number 0-7/a-h */ + n = parse_unit(&str); - n = *str - '0'; - if(n >= MAX_DEV) + if((n < 0) || (n >= MAX_DEV)) return(err); dev = &ubd_dev[n]; @@ -669,7 +747,7 @@ int ubd_init(void) elevator_init(ubd_queue, &elevator_noop); - if (fake_major != 0) { + if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; snprintf(name, sizeof(name), "ubd_%d", fake_major); @@ -696,6 +774,7 @@ int ubd_driver_init(void){ io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), &thread_fd); if(io_pid < 0){ + io_pid = -1; printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " "falling back to synchronous I/O\n", -io_pid); @@ -703,8 +782,8 @@ int ubd_driver_init(void){ } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, SA_INTERRUPT, "ubd", ubd_dev); - if(err != 0) printk(KERN_ERR - "um_request_irq failed - errno = %d\n", -err); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); return(err); } @@ -714,15 +793,9 @@ static int ubd_open(struct inode *inode, { struct gendisk *disk = inode->i_bdev->bd_disk; struct ubd *dev = disk->private_data; - int err = -EISDIR; - - if(dev->is_dir == 1) - goto out; + int err = 0; - err = 0; if(dev->count == 0){ - dev->openflags = dev->boot_openflags; - err = ubd_open_dev(dev); if(err){ printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", @@ -749,62 +822,156 @@ static int ubd_release(struct inode * in return(0); } -void cowify_req(struct io_thread_req *req, struct ubd *dev) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) +{ + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; + + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / + sizeof(unsigned long) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) { - int i, update_bitmap, sector = req->offset >> 9; + __u64 sector = req->offset >> 9; + int i; if(req->length > (sizeof(req->sector_mask) * 8) << 9) panic("Operation too long"); + if(req->op == UBD_READ) { for(i = 0; i < req->length >> 9; i++){ - if(ubd_test_bit(sector + i, (unsigned char *) - dev->cow.bitmap)){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) &req->sector_mask); - } } - } - else { - update_bitmap = 0; - for(i = 0; i < req->length >> 9; i++){ - ubd_set_bit(i, (unsigned char *) - &req->sector_mask); - if(!ubd_test_bit(sector + i, (unsigned char *) - dev->cow.bitmap)) - update_bitmap = 1; - ubd_set_bit(sector + i, (unsigned char *) - dev->cow.bitmap); - } - if(update_bitmap){ - req->cow_offset = sector / (sizeof(unsigned long) * 8); - req->bitmap_words[0] = - dev->cow.bitmap[req->cow_offset]; - req->bitmap_words[1] = - dev->cow.bitmap[req->cow_offset + 1]; - req->cow_offset *= sizeof(unsigned long); - req->cow_offset += dev->cow.bitmap_offset; + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); +} + +static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) +{ + __u64 sector; + unsigned char *bitmap; + int bit, i; + + /* mmap must have been requested on the command line */ + if(!ubd_do_mmap) + return(-1); + + /* The buffer must be page aligned */ + if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) + return(-1); + + /* The request must be a page long */ + if((req->current_nr_sectors << 9) != PAGE_SIZE) + return(-1); + + if(dev->cow.file == NULL) + return(dev->fd); + + sector = offset >> 9; + bitmap = (unsigned char *) dev->cow.bitmap; + bit = ubd_test_bit(sector, bitmap); + + for(i = 1; i < req->current_nr_sectors; i++){ + if(ubd_test_bit(sector + i, bitmap) != bit) + return(-1); + } + + if(bit || (rq_data_dir(req) == WRITE)) + offset += dev->cow.data_offset; + + /* The data on disk must be page aligned */ + if((offset % UBD_MMAP_BLOCK_SIZE) != 0) + return(-1); + + return(bit ? dev->fd : dev->cow.fd); +} + +static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, + struct request *req, + struct io_thread_req *io_req) +{ + int err; + + if(rq_data_dir(req) == WRITE){ + /* Writes are almost no-ops since the new data is already in the + * host page cache + */ + dev->map_writes++; + if(dev->cow.file != NULL) + cowify_bitmap(io_req->offset, io_req->length, + &io_req->sector_mask, &io_req->cow_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + io_req->bitmap_words, + dev->cow.bitmap_len); + } + else { + int w; + + if((dev->cow.file != NULL) && (fd == dev->cow.fd)) + w = 0; + else w = dev->openflags.w; + + if((dev->cow.file != NULL) && (fd == dev->fd)) + offset += dev->cow.data_offset; + + err = physmem_subst_mapping(req->buffer, fd, offset, w); + if(err){ + printk("physmem_subst_mapping failed, err = %d\n", + -err); + return(1); } + dev->map_reads++; } + io_req->op = UBD_MMAP; + io_req->buffer = req->buffer; + return(0); } static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; - __u64 block; - int nsect; + __u64 offset; + int len, fd; if(req->rq_status == RQ_INACTIVE) return(1); - if(dev->is_dir){ - strcpy(req->buffer, "HOSTFS:"); - strcat(req->buffer, dev->file); - spin_lock(&ubd_io_lock); - end_request(req, 1); - spin_unlock(&ubd_io_lock); - return(1); - } - if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); @@ -814,23 +981,49 @@ static int prepare_request(struct reques return(1); } - block = req->sector; - nsect = req->current_nr_sectors; + offset = ((__u64) req->sector) << 9; + len = req->current_nr_sectors << 9; - io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->map_fd = -1; + io_req->cow_offset = -1; + io_req->offset = offset; + io_req->length = len; + io_req->error = 0; + io_req->sector_mask = 0; + + fd = mmap_fd(req, dev, io_req->offset); + if(fd > 0){ + /* If mmapping is otherwise OK, but the first access to the + * page is a write, then it's not mapped in yet. So we have + * to write the data to disk first, then we can map the disk + * page in and continue normally from there. + */ + if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){ + io_req->map_fd = dev->fd; + io_req->map_offset = io_req->offset + + dev->cow.data_offset; + dev->write_maps++; + } + else return(prepare_mmap_request(dev, fd, io_req->offset, req, + io_req)); + } + + if(rq_data_dir(req) == READ) + dev->nomap_reads++; + else dev->nomap_writes++; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->offset = ((__u64) block) << 9; - io_req->length = nsect << 9; io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->sector_mask = 0; - io_req->cow_offset = -1; - io_req->error = 0; - if(dev->cow.file != NULL) cowify_req(io_req, dev); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + return(0); } @@ -885,7 +1078,7 @@ static int ubd_ioctl(struct inode * inod g.heads = 128; g.sectors = 32; g.cylinders = dev->size / (128 * 32 * 512); - g.start = 2; + g.start = get_start_sect(inode->i_bdev); return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); case HDIO_SET_UNMASKINTR: @@ -935,6 +1128,142 @@ static int ubd_ioctl(struct inode * inod return(-EINVAL); } +static int ubd_check_remapped(int fd, unsigned long address, int is_write, + __u64 offset) +{ + __u64 bitmap_offset; + unsigned long new_bitmap[2]; + int i, err, n; + + /* If it's not a write access, we can't do anything about it */ + if(!is_write) + return(0); + + /* We have a write */ + for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ + struct ubd *dev = &ubd_dev[i]; + + if((dev->fd != fd) && (dev->cow.fd != fd)) + continue; + + /* It's a write to a ubd device */ + + if(!dev->openflags.w){ + /* It's a write access on a read-only device - probably + * shouldn't happen. If the kernel is trying to change + * something with no intention of writing it back out, + * then this message will clue us in that this needs + * fixing + */ + printk("Write access to mapped page from readonly ubd " + "device %d\n", i); + return(0); + } + + /* It's a write to a writeable ubd device - it must be COWed + * because, otherwise, the page would have been mapped in + * writeable + */ + + if(!dev->cow.file) + panic("Write fault on writeable non-COW ubd device %d", + i); + + /* It should also be an access to the backing file since the + * COW pages should be mapped in read-write + */ + + if(fd == dev->fd) + panic("Write fault on a backing page of ubd " + "device %d\n", i); + + /* So, we do the write, copying the backing data to the COW + * file... + */ + + err = os_seek_file(dev->fd, offset + dev->cow.data_offset); + if(err < 0) + panic("Couldn't seek to %lld in COW file of ubd " + "device %d, err = %d", + offset + dev->cow.data_offset, i, -err); + + n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); + if(n != PAGE_SIZE) + panic("Couldn't copy data to COW file of ubd " + "device %d, err = %d", i, -n); + + /* ... updating the COW bitmap... */ + + cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + new_bitmap, dev->cow.bitmap_len); + + err = os_seek_file(dev->fd, bitmap_offset); + if(err < 0) + panic("Couldn't seek to %lld in COW file of ubd " + "device %d, err = %d", bitmap_offset, i, -err); + + n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); + if(n != sizeof(new_bitmap)) + panic("Couldn't update bitmap of ubd device %d, " + "err = %d", i, -n); + + /* Maybe we can map the COW page in, and maybe we can't. If + * it is a pre-V3 COW file, we can't, since the alignment will + * be wrong. If it is a V3 or later COW file which has been + * moved to a system with a larger page size, then maybe we + * can't, depending on the exact location of the page. + */ + + offset += dev->cow.data_offset; + + /* Remove the remapping, putting the original anonymous page + * back. If the COW file can be mapped in, that is done. + * Otherwise, the COW page is read in. + */ + + if(!physmem_remove_mapping((void *) address)) + panic("Address 0x%lx not remapped by ubd device %d", + address, i); + if((offset % UBD_MMAP_BLOCK_SIZE) == 0) + physmem_subst_mapping((void *) address, dev->fd, + offset, 1); + else { + err = os_seek_file(dev->fd, offset); + if(err < 0) + panic("Couldn't seek to %lld in COW file of " + "ubd device %d, err = %d", offset, i, + -err); + + n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); + if(n != PAGE_SIZE) + panic("Failed to read page from offset %llx of " + "COW file of ubd device %d, err = %d", + offset, i, -n); + } + + return(1); + } + + /* It's not a write on a ubd device */ + return(0); +} + +static struct remapper ubd_remapper = { + .list = LIST_HEAD_INIT(ubd_remapper.list), + .proc = ubd_check_remapped, +}; + +static int ubd_remapper_setup(void) +{ + if(ubd_do_mmap) + register_remapper(&ubd_remapper); + + return(0); +} + +__initcall(ubd_remapper_setup); + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.3-rc2/arch/um/drivers/ubd_user.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/drivers/ubd_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -11,11 +11,8 @@ #include #include #include -#include #include -#include #include -#include #include #include #include "asm/types.h" @@ -24,146 +21,30 @@ #include "user.h" #include "ubd_user.h" #include "os.h" +#include "cow.h" #include #include -#if __BYTE_ORDER == __BIG_ENDIAN -# define ntohll(x) (x) -# define htonll(x) (x) -#elif __BYTE_ORDER == __LITTLE_ENDIAN -# define ntohll(x) bswap_64(x) -# define htonll(x) bswap_64(x) -#else -#error "__BYTE_ORDER not defined" -#endif - -#define PATH_LEN_V1 256 - -struct cow_header_v1 { - int magic; - int version; - char backing_file[PATH_LEN_V1]; - time_t mtime; - __u64 size; - int sectorsize; -}; - -#define PATH_LEN_V2 MAXPATHLEN - -struct cow_header_v2 { - unsigned long magic; - unsigned long version; - char backing_file[PATH_LEN_V2]; - time_t mtime; - __u64 size; - int sectorsize; -}; - -union cow_header { - struct cow_header_v1 v1; - struct cow_header_v2 v2; -}; - -#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -#define COW_VERSION 2 - -static void sizes(__u64 size, int sectorsize, int bitmap_offset, - unsigned long *bitmap_len_out, int *data_offset_out) -{ - *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); - - *data_offset_out = bitmap_offset + *bitmap_len_out; - *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize; - *data_offset_out *= sectorsize; -} - -static int read_cow_header(int fd, int *magic_out, char **backing_file_out, - time_t *mtime_out, __u64 *size_out, - int *sectorsize_out, int *bitmap_offset_out) -{ - union cow_header *header; - char *file; - int err, n; - unsigned long version, magic; - - header = um_kmalloc(sizeof(*header)); - if(header == NULL){ - printk("read_cow_header - Failed to allocate header\n"); - return(-ENOMEM); - } - err = -EINVAL; - n = read(fd, header, sizeof(*header)); - if(n < offsetof(typeof(header->v1), backing_file)){ - printk("read_cow_header - short header\n"); - goto out; - } - - magic = header->v1.magic; - if(magic == COW_MAGIC) { - version = header->v1.version; - } - else if(magic == ntohl(COW_MAGIC)){ - version = ntohl(header->v1.version); - } - else goto out; - - *magic_out = COW_MAGIC; - - if(version == 1){ - if(n < sizeof(header->v1)){ - printk("read_cow_header - failed to read V1 header\n"); - goto out; - } - *mtime_out = header->v1.mtime; - *size_out = header->v1.size; - *sectorsize_out = header->v1.sectorsize; - *bitmap_offset_out = sizeof(header->v1); - file = header->v1.backing_file; - } - else if(version == 2){ - if(n < sizeof(header->v2)){ - printk("read_cow_header - failed to read V2 header\n"); - goto out; - } - *mtime_out = ntohl(header->v2.mtime); - *size_out = ntohll(header->v2.size); - *sectorsize_out = ntohl(header->v2.sectorsize); - *bitmap_offset_out = sizeof(header->v2); - file = header->v2.backing_file; - } - else { - printk("read_cow_header - invalid COW version\n"); - goto out; - } - err = -ENOMEM; - *backing_file_out = uml_strdup(file); - if(*backing_file_out == NULL){ - printk("read_cow_header - failed to allocate backing file\n"); - goto out; - } - err = 0; - out: - kfree(header); - return(err); -} static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) { - struct stat buf1, buf2; + struct uml_stat buf1, buf2; + int err; if(from_cmdline == NULL) return(1); if(!strcmp(from_cmdline, from_cow)) return(1); - if(stat(from_cmdline, &buf1) < 0){ - printk("Couldn't stat '%s', errno = %d\n", from_cmdline, - errno); + err = os_stat_file(from_cmdline, &buf1); + if(err < 0){ + printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); return(1); } - if(stat(from_cow, &buf2) < 0){ - printk("Couldn't stat '%s', errno = %d\n", from_cow, errno); + err = os_stat_file(from_cow, &buf2); + if(err < 0){ + printk("Couldn't stat '%s', err = %d\n", from_cow, -err); return(1); } - if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino)) + if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) return(1); printk("Backing file mismatch - \"%s\" requested,\n" @@ -174,20 +55,21 @@ static int same_backing_files(char *from static int backing_file_mismatch(char *file, __u64 size, time_t mtime) { - struct stat64 buf; + unsigned long modtime; long long actual; int err; - if(stat64(file, &buf) < 0){ - printk("Failed to stat backing file \"%s\", errno = %d\n", - file, errno); - return(-errno); + err = os_file_modtime(file, &modtime); + if(err < 0){ + printk("Failed to get modification time of backing file " + "\"%s\", err = %d\n", file, -err); + return(err); } err = os_file_size(file, &actual); - if(err){ + if(err < 0){ printk("Failed to get size of backing file \"%s\", " - "errno = %d\n", file, -err); + "err = %d\n", file, -err); return(err); } @@ -196,9 +78,9 @@ static int backing_file_mismatch(char *f "file\n", size, actual); return(-EINVAL); } - if(buf.st_mtime != mtime){ + if(modtime != mtime){ printk("mtime mismatch (%ld vs %ld) of COW header vs backing " - "file\n", mtime, buf.st_mtime); + "file\n", mtime, modtime); return(-EINVAL); } return(0); @@ -209,124 +91,16 @@ int read_cow_bitmap(int fd, void *buf, i int err; err = os_seek_file(fd, offset); - if(err != 0) return(-errno); - err = read(fd, buf, len); - if(err < 0) return(-errno); - return(0); -} + if(err < 0) + return(err); -static int absolutize(char *to, int size, char *from) -{ - char save_cwd[256], *slash; - int remaining; + err = os_read_file(fd, buf, len); + if(err < 0) + return(err); - if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { - printk("absolutize : unable to get cwd - errno = %d\n", errno); - return(-1); - } - slash = strrchr(from, '/'); - if(slash != NULL){ - *slash = '\0'; - if(chdir(from)){ - *slash = '/'; - printk("absolutize : Can't cd to '%s' - errno = %d\n", - from, errno); - return(-1); - } - *slash = '/'; - if(getcwd(to, size) == NULL){ - printk("absolutize : unable to get cwd of '%s' - " - "errno = %d\n", from, errno); - return(-1); - } - remaining = size - strlen(to); - if(strlen(slash) + 1 > remaining){ - printk("absolutize : unable to fit '%s' into %d " - "chars\n", from, size); - return(-1); - } - strcat(to, slash); - } - else { - if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ - printk("absolutize : unable to fit '%s' into %d " - "chars\n", from, size); - return(-1); - } - strcpy(to, save_cwd); - strcat(to, "/"); - strcat(to, from); - } - chdir(save_cwd); return(0); } -static int write_cow_header(char *cow_file, int fd, char *backing_file, - int sectorsize, long long *size) -{ - struct cow_header_v2 *header; - struct stat64 buf; - int err; - - err = os_seek_file(fd, 0); - if(err != 0){ - printk("write_cow_header - lseek failed, errno = %d\n", errno); - return(-errno); - } - - err = -ENOMEM; - header = um_kmalloc(sizeof(*header)); - if(header == NULL){ - printk("Failed to allocate COW V2 header\n"); - goto out; - } - header->magic = htonl(COW_MAGIC); - header->version = htonl(COW_VERSION); - - err = -EINVAL; - if(strlen(backing_file) > sizeof(header->backing_file) - 1){ - printk("Backing file name \"%s\" is too long - names are " - "limited to %d characters\n", backing_file, - sizeof(header->backing_file) - 1); - goto out_free; - } - - if(absolutize(header->backing_file, sizeof(header->backing_file), - backing_file)) - goto out_free; - - err = stat64(header->backing_file, &buf); - if(err < 0){ - printk("Stat of backing file '%s' failed, errno = %d\n", - header->backing_file, errno); - err = -errno; - goto out_free; - } - - err = os_file_size(header->backing_file, size); - if(err){ - printk("Couldn't get size of backing file '%s', errno = %d\n", - header->backing_file, -*size); - goto out_free; - } - - header->mtime = htonl(buf.st_mtime); - header->size = htonll(*size); - header->sectorsize = htonl(sectorsize); - - err = write(fd, header, sizeof(*header)); - if(err != sizeof(*header)){ - printk("Write of header to new COW file '%s' failed, " - "errno = %d\n", cow_file, errno); - goto out_free; - } - err = 0; - out_free: - kfree(header); - out: - return(err); -} - int open_ubd_file(char *file, struct openflags *openflags, char **backing_file_out, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out, @@ -334,26 +108,36 @@ int open_ubd_file(char *file, struct ope { time_t mtime; __u64 size; + __u32 version, align; char *backing_file; - int fd, err, sectorsize, magic, same, mode = 0644; + int fd, err, sectorsize, same, mode = 0644; - if((fd = os_open_file(file, *openflags, mode)) < 0){ + fd = os_open_file(file, *openflags, mode); + if(fd < 0){ if((fd == -ENOENT) && (create_cow_out != NULL)) *create_cow_out = 1; if(!openflags->w || ((errno != EROFS) && (errno != EACCES))) return(-errno); openflags->w = 0; - if((fd = os_open_file(file, *openflags, mode)) < 0) + fd = os_open_file(file, *openflags, mode); + if(fd < 0) return(fd); } + + err = os_lock_file(fd, openflags->w); + if(err < 0){ + printk("Failed to lock '%s', err = %d\n", file, -err); + goto out_close; + } + if(backing_file_out == NULL) return(fd); - err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, - §orsize, bitmap_offset_out); + err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, + &size, §orsize, &align, bitmap_offset_out); if(err && (*backing_file_out != NULL)){ printk("Failed to read COW header from COW file \"%s\", " - "errno = %d\n", file, err); - goto error; + "errno = %d\n", file, -err); + goto out_close; } if(err) return(fd); @@ -363,36 +147,33 @@ int open_ubd_file(char *file, struct ope if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ printk("Switching backing file to '%s'\n", *backing_file_out); - err = write_cow_header(file, fd, *backing_file_out, - sectorsize, &size); + err = write_cow_header(file, fd, *backing_file_out, + sectorsize, align, &size); if(err){ - printk("Switch failed, errno = %d\n", err); + printk("Switch failed, errno = %d\n", -err); return(err); } } else { *backing_file_out = backing_file; err = backing_file_mismatch(*backing_file_out, size, mtime); - if(err) goto error; + if(err) goto out_close; } - sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, - data_offset_out); + cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, + bitmap_len_out, data_offset_out); return(fd); - error: - close(fd); + out_close: + os_close_file(fd); return(err); } int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, - int sectorsize, int *bitmap_offset_out, + int sectorsize, int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out) { - __u64 blocks; - long zero; - int err, fd, i; - long long size; + int err, fd; flags.c = 1; fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); @@ -403,57 +184,49 @@ int create_cow_file(char *cow_file, char goto out; } - err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size); - if(err) goto out_close; - - blocks = (size + sectorsize - 1) / sectorsize; - blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8); - zero = 0; - for(i = 0; i < blocks; i++){ - err = write(fd, &zero, sizeof(zero)); - if(err != sizeof(zero)){ - printk("Write of bitmap to new COW file '%s' failed, " - "errno = %d\n", cow_file, errno); - goto out_close; - } - } - - sizes(size, sectorsize, sizeof(struct cow_header_v2), - bitmap_len_out, data_offset_out); - *bitmap_offset_out = sizeof(struct cow_header_v2); - - return(fd); - - out_close: - close(fd); + err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, + bitmap_offset_out, bitmap_len_out, + data_offset_out); + if(!err) + return(fd); + os_close_file(fd); out: return(err); } +/* XXX Just trivial wrappers around os_read_file and os_write_file */ int read_ubd_fs(int fd, void *buffer, int len) { - int n; - - n = read(fd, buffer, len); - if(n < 0) return(-errno); - else return(n); + return(os_read_file(fd, buffer, len)); } int write_ubd_fs(int fd, char *buffer, int len) { - int n; - - n = write(fd, buffer, len); - if(n < 0) return(-errno); - else return(n); + return(os_write_file(fd, buffer, len)); } -int ubd_is_dir(char *file) +static int update_bitmap(struct io_thread_req *req) { - struct stat64 buf; + int n; + + if(req->cow_offset == -1) + return(0); + + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return(1); + } + + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return(1); + } - if(stat64(file, &buf) < 0) return(0); - return(S_ISDIR(buf.st_mode)); + return(0); } void do_io(struct io_thread_req *req) @@ -461,8 +234,18 @@ void do_io(struct io_thread_req *req) char *buf; unsigned long len; int n, nsectors, start, end, bit; + int err; __u64 off; + if(req->op == UBD_MMAP){ + /* Touch the page to force the host to do any necessary IO to + * get it into memory + */ + n = *((volatile int *) req->buffer); + req->error = update_bitmap(req); + return; + } + nsectors = req->length / req->sectorsize; start = 0; do { @@ -473,15 +256,14 @@ void do_io(struct io_thread_req *req) &req->sector_mask) == bit)) end++; - if(end != nsectors) - printk("end != nsectors\n"); off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; buf = &req->buffer[start * req->sectorsize]; - if(os_seek_file(req->fds[bit], off) != 0){ - printk("do_io - lseek failed : errno = %d\n", errno); + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); req->error = 1; return; } @@ -490,11 +272,10 @@ void do_io(struct io_thread_req *req) do { buf = &buf[n]; len -= n; - n = read(req->fds[bit], buf, len); + n = os_read_file(req->fds[bit], buf, len); if (n < 0) { - printk("do_io - read returned %d : " - "errno = %d fd = %d\n", n, - errno, req->fds[bit]); + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); req->error = 1; return; } @@ -502,11 +283,10 @@ void do_io(struct io_thread_req *req) if (n < len) memset(&buf[n], 0, len - n); } else { - n = write(req->fds[bit], buf, len); + n = os_write_file(req->fds[bit], buf, len); if(n != len){ - printk("do_io - write returned %d : " - "errno = %d fd = %d\n", n, - errno, req->fds[bit]); + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); req->error = 1; return; } @@ -515,24 +295,7 @@ void do_io(struct io_thread_req *req) start = end; } while(start < nsectors); - if(req->cow_offset != -1){ - if(os_seek_file(req->fds[1], req->cow_offset) != 0){ - printk("do_io - bitmap lseek failed : errno = %d\n", - errno); - req->error = 1; - return; - } - n = write(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); - if(n != sizeof(req->bitmap_words)){ - printk("do_io - bitmap update returned %d : " - "errno = %d fd = %d\n", n, errno, req->fds[1]); - req->error = 1; - return; - } - } - req->error = 0; - return; + req->error = update_bitmap(req); } /* Changed in start_io_thread, which is serialized by being called only @@ -550,19 +313,23 @@ int io_thread(void *arg) signal(SIGWINCH, SIG_IGN); while(1){ - n = read(kernel_fd, &req, sizeof(req)); - if(n < 0) printk("io_thread - read returned %d, errno = %d\n", - n, errno); - else if(n < sizeof(req)){ - printk("io_thread - short read : length = %d\n", n); + n = os_read_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } continue; } io_count++; do_io(&req); - n = write(kernel_fd, &req, sizeof(req)); + n = os_write_file(kernel_fd, &req, sizeof(req)); if(n != sizeof(req)) - printk("io_thread - write failed, errno = %d\n", - errno); + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); } } @@ -571,10 +338,11 @@ int start_io_thread(unsigned long sp, in int pid, fds[2], err; err = os_pipe(fds, 1, 1); - if(err){ - printk("start_io_thread - os_pipe failed, errno = %d\n", -err); - return(-1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; } + kernel_fd = fds[0]; *fd_out = fds[1]; @@ -582,32 +350,19 @@ int start_io_thread(unsigned long sp, in NULL); if(pid < 0){ printk("start_io_thread - clone failed : errno = %d\n", errno); - return(-errno); + goto out_close; } - return(pid); -} - -#ifdef notdef -int start_io_thread(unsigned long sp, int *fd_out) -{ - int pid; - if((kernel_fd = get_pty()) < 0) return(-1); - raw(kernel_fd, 0); - if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){ - printk("Couldn't open tty for IO\n"); - return(-1); - } - - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, - NULL); - if(pid < 0){ - printk("start_io_thread - clone failed : errno = %d\n", errno); - return(-errno); - } return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return(err); } -#endif /* * Overrides for Emacs so that we follow Linus's tabbing style. --- linux-2.6.3-rc2/arch/um/drivers/xterm.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/drivers/xterm.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -36,7 +35,8 @@ void *xterm_init(char *str, int device, { struct xterm_chan *data; - if((data = malloc(sizeof(*data))) == NULL) return(NULL); + data = malloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct xterm_chan) { .pid = -1, .helper_pid = -1, .device = device, @@ -93,7 +93,7 @@ int xterm_open(int input, int output, in "/usr/lib/uml/port-helper", "-uml-socket", file, NULL }; - if(access(argv[4], X_OK)) + if(os_access(argv[4], OS_ACC_X_OK) < 0) argv[4] = "port-helper"; fd = mkstemp(file); @@ -106,13 +106,13 @@ int xterm_open(int input, int output, in printk("xterm_open : unlink failed, errno = %d\n", errno); return(-errno); } - close(fd); + os_close_file(fd); - fd = create_unix_socket(file, sizeof(file)); + fd = os_create_unix_socket(file, sizeof(file), 1); if(fd < 0){ printk("xterm_open : create_unix_socket failed, errno = %d\n", -fd); - return(-fd); + return(fd); } sprintf(title, data->title, data->device); @@ -128,15 +128,16 @@ int xterm_open(int input, int output, in if(data->direct_rcv) new = os_rcv_fd(fd, &data->helper_pid); else { - if((err = os_set_fd_block(fd, 0)) != 0){ + err = os_set_fd_block(fd, 0); + if(err < 0){ printk("xterm_open : failed to set descriptor " - "non-blocking, errno = %d\n", err); + "non-blocking, err = %d\n", -err); return(err); } new = xterm_fd(fd, &data->helper_pid); } if(new < 0){ - printk("xterm_open : os_rcv_fd failed, errno = %d\n", -new); + printk("xterm_open : os_rcv_fd failed, err = %d\n", -new); goto out; } @@ -160,7 +161,7 @@ void xterm_close(int fd, void *d) if(data->helper_pid != -1) os_kill_process(data->helper_pid, 0); data->helper_pid = -1; - close(fd); + os_close_file(fd); } void xterm_free(void *d) --- linux-2.6.3-rc2/arch/um/drivers/xterm_kern.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/drivers/xterm_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,9 +5,12 @@ #include "linux/errno.h" #include "linux/slab.h" +#include "linux/signal.h" +#include "linux/interrupt.h" #include "asm/semaphore.h" #include "asm/irq.h" #include "irq_user.h" +#include "irq_kern.h" #include "kern_util.h" #include "os.h" #include "xterm.h" @@ -19,17 +22,18 @@ struct xterm_wait { int new_fd; }; -static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs) { struct xterm_wait *xterm = data; int fd; fd = os_rcv_fd(xterm->fd, &xterm->pid); if(fd == -EAGAIN) - return; + return(IRQ_NONE); xterm->new_fd = fd; up(&xterm->sem); + return(IRQ_HANDLED); } int xterm_fd(int socket, int *pid_out) @@ -54,7 +58,8 @@ int xterm_fd(int socket, int *pid_out) if(err){ printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " "err = %d\n", err); - return(err); + ret = err; + goto out; } down(&data->sem); @@ -62,6 +67,7 @@ int xterm_fd(int socket, int *pid_out) ret = data->new_fd; *pid_out = data->pid; + out: kfree(data); return(ret); --- linux-2.6.3-rc2/arch/um/dyn.lds.S 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/um/dyn.lds.S 2004-02-12 00:40:26.000000000 -0800 @@ -10,12 +10,15 @@ SECTIONS { . = START + SIZEOF_HEADERS; .interp : { *(.interp) } - . = ALIGN(4096); __binary_start = .; . = ALIGN(4096); /* Init code and data */ _stext = .; __init_begin = .; - .text.init : { *(.text.init) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } . = ALIGN(4096); @@ -67,7 +70,7 @@ SECTIONS #include "asm/common.lds.S" - .data.init : { *(.data.init) } + init.data : { *(.init.data) } /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but --- linux-2.6.3-rc2/arch/um/include/2_5compat.h 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/include/2_5compat.h 2004-02-12 00:40:26.000000000 -0800 @@ -6,20 +6,6 @@ #ifndef __2_5_COMPAT_H__ #define __2_5_COMPAT_H__ -#include "linux/version.h" - -#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ - name : dev_name, \ - write : write_proc, \ - read : NULL, \ - device : device_proc, \ - setup : setup_proc, \ - flags : f, \ - index : -1, \ - cflag : 0, \ - next : NULL \ -} - #define INIT_HARDSECT(arr, maj, sizes) #define SET_PRI(task) do ; while(0) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/include/irq_kern.h 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __IRQ_KERN_H__ +#define __IRQ_KERN_H__ + +#include "linux/interrupt.h" + +extern int um_request_irq(unsigned int irq, int fd, int type, + irqreturn_t (*handler)(int, void *, + struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/include/kern_util.h 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/include/kern_util.h 2004-02-12 00:40:57.000000000 -0800 @@ -45,7 +45,6 @@ extern int page_mask(void); extern int need_finish_fork(void); extern void free_stack(unsigned long stack, int order); extern void add_input_request(int op, void (*proc)(int), void *arg); -extern int sys_execve(char *file, char **argv, char **env); extern char *current_cmd(void); extern void timer_handler(int sig, union uml_pt_regs *regs); extern int set_signals(int enable); @@ -63,10 +62,9 @@ extern void init_flush_vm(void); extern void *syscall_sp(void *t); extern void syscall_trace(void); extern int hz(void); -extern void idle_timer(void); +extern void uml_idle_timer(void); extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); extern int external_pid(void *t); -extern int pid_to_processor_id(int pid); extern void boot_timer_handler(int sig); extern void interrupt_end(void); extern void initial_thread_cb(void (*proc)(void *), void *arg); @@ -90,9 +88,7 @@ extern int remove_gdb(void); extern char *uml_strdup(char *string); extern void unprotect_kernel_mem(void); extern void protect_kernel_mem(void); -extern void set_kmem_end(unsigned long); extern void uml_cleanup(void); -extern int pid_to_processor_id(int pid); extern void set_current(void *t); extern void lock_signalled_task(void *t); extern void IPI_handler(int cpu); @@ -101,7 +97,9 @@ extern void *get_init_task(void); extern int clear_user_proc(void *buf, int size); extern int copy_to_user_proc(void *to, void *from, int size); extern int copy_from_user_proc(void *to, void *from, int size); +extern int strlen_user_proc(char *str); extern void bus_handler(int sig, union uml_pt_regs *regs); +extern void winch(int sig, union uml_pt_regs *regs); extern long execute_syscall(void *r); extern int smp_sigio_handler(void); extern void *get_current(void); @@ -112,6 +110,8 @@ extern void arch_switch(void); extern void free_irq(unsigned int, void *); extern int um_in_interrupt(void); extern int cpu(void); +extern unsigned long long time_stamp(void); + #endif /* --- linux-2.6.3-rc2/arch/um/include/line.h 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/include/line.h 2004-02-12 00:40:26.000000000 -0800 @@ -9,12 +9,14 @@ #include "linux/list.h" #include "linux/workqueue.h" #include "linux/tty.h" +#include "linux/interrupt.h" #include "asm/semaphore.h" #include "chan_user.h" #include "mconsole_kern.h" struct line_driver { char *name; + char *device_name; char *devfs_name; short major; short minor_start; @@ -67,8 +69,6 @@ struct lines { #define LINES_INIT(n) { num : n } -extern void line_interrupt(int irq, void *data, struct pt_regs *unused); -extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused); extern void line_close(struct line *lines, struct tty_struct *tty); extern int line_open(struct line *lines, struct tty_struct *tty, struct chan_opts *opts); --- linux-2.6.3-rc2/arch/um/include/mconsole.h 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/include/mconsole.h 2004-02-12 00:40:26.000000000 -0800 @@ -41,11 +41,13 @@ struct mconsole_notify { struct mc_request; +enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; + struct mconsole_command { char *command; void (*handler)(struct mc_request *req); - int as_interrupt; + enum mc_context context; }; struct mc_request @@ -77,6 +79,8 @@ extern void mconsole_sysrq(struct mc_req extern void mconsole_cad(struct mc_request *req); extern void mconsole_stop(struct mc_request *req); extern void mconsole_go(struct mc_request *req); +extern void mconsole_log(struct mc_request *req); +extern void mconsole_proc(struct mc_request *req); extern int mconsole_get_request(int fd, struct mc_request *req); extern int mconsole_notify(char *sock_name, int type, const void *data, --- linux-2.6.3-rc2/arch/um/include/mem.h 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/include/mem.h 2004-02-12 00:40:26.000000000 -0800 @@ -1,19 +1,17 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ #ifndef __MEM_H__ #define __MEM_H__ -struct vm_reserved { - struct list_head list; - unsigned long start; - unsigned long end; -}; +#include "linux/types.h" -extern void set_usable_vm(unsigned long start, unsigned long end); -extern void set_kmem_end(unsigned long new); +extern int phys_mapping(unsigned long phys, __u64 *offset_out); +extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); +extern int is_remapped(void *virt); +extern int physmem_remove_mapping(void *virt); #endif --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/include/mem_kern.h 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __MEM_KERN_H__ +#define __MEM_KERN_H__ + +#include "linux/list.h" +#include "linux/types.h" + +struct remapper { + struct list_head list; + int (*proc)(int, unsigned long, int, __u64); +}; + +extern void register_remapper(struct remapper *info); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/include/mem_user.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/include/mem_user.h 2004-02-12 00:40:26.000000000 -0800 @@ -32,43 +32,38 @@ #ifndef _MEM_USER_H #define _MEM_USER_H -struct mem_region { +struct iomem_region { + struct iomem_region *next; char *driver; - unsigned long start_pfn; - unsigned long start; - unsigned long len; - void *mem_map; int fd; + int size; + unsigned long phys; + unsigned long virt; }; -extern struct mem_region *regions[]; -extern struct mem_region physmem_region; +extern struct iomem_region *iomem_regions; +extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long host_task_size; extern unsigned long task_size; +extern void check_devanon(void); extern int init_mem_user(void); extern int create_mem_file(unsigned long len); -extern void setup_range(int fd, char *driver, unsigned long start, - unsigned long pfn, unsigned long total, int need_vm, - struct mem_region *region, void *reserved); extern void setup_memory(void *entry); extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern int init_maps(struct mem_region *region); -extern int nregions(void); -extern int reserve_vm(unsigned long start, unsigned long end, void *e); +extern int init_maps(unsigned long physmem, unsigned long iomem, + unsigned long highmem); extern unsigned long get_vm(unsigned long len); extern void setup_physmem(unsigned long start, unsigned long usable, - unsigned long len); -extern int setup_region(struct mem_region *region, void *entry); + unsigned long len, unsigned long highmem); extern void add_iomem(char *name, int fd, unsigned long size); -extern struct mem_region *phys_region(unsigned long phys); extern unsigned long phys_offset(unsigned long phys); extern void unmap_physmem(void); -extern int map_memory(unsigned long virt, unsigned long phys, - unsigned long len, int r, int w, int x); +extern void map_memory(unsigned long virt, unsigned long phys, + unsigned long len, int r, int w, int x); extern int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, int must_succeed); extern unsigned long get_kmem_end(void); --- linux-2.6.3-rc2/arch/um/include/os.h 2003-06-14 12:18:02.000000000 -0700 +++ 25/arch/um/include/os.h 2004-02-12 00:40:26.000000000 -0800 @@ -17,6 +17,32 @@ #define OS_TYPE_FIFO 6 #define OS_TYPE_SOCK 7 +/* os_access() flags */ +#define OS_ACC_F_OK 0 /* Test for existence. */ +#define OS_ACC_X_OK 1 /* Test for execute permission. */ +#define OS_ACC_W_OK 2 /* Test for write permission. */ +#define OS_ACC_R_OK 4 /* Test for read permission. */ +#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ + +/* + * types taken from stat_file() in hostfs_user.c + * (if they are wrong here, they are wrong there...). + */ +struct uml_stat { + int ust_dev; /* device */ + unsigned long long ust_ino; /* inode */ + int ust_mode; /* protection */ + int ust_nlink; /* number of hard links */ + int ust_uid; /* user ID of owner */ + int ust_gid; /* group ID of owner */ + unsigned long long ust_size; /* total size, in bytes */ + int ust_blksize; /* blocksize for filesystem I/O */ + unsigned long long ust_blocks; /* number of blocks allocated */ + unsigned long ust_atime; /* time of last access */ + unsigned long ust_mtime; /* time of last modification */ + unsigned long ust_ctime; /* time of last change */ +}; + struct openflags { unsigned int r : 1; unsigned int w : 1; @@ -84,29 +110,47 @@ static inline struct openflags of_excl(s flags.e = 1; return(flags); } - + static inline struct openflags of_cloexec(struct openflags flags) { flags.cl = 1; return(flags); } +extern int os_stat_file(const char *file_name, struct uml_stat *buf); +extern int os_stat_fd(const int fd, struct uml_stat *buf); +extern int os_access(const char *file, int mode); +extern void os_print_error(int error, const char* str); +extern int os_get_exec_close(int fd, int *close_on_exec); +extern int os_set_exec_close(int fd, int close_on_exec); +extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); +extern int os_window_size(int fd, int *rows, int *cols); +extern int os_new_tty_pgrp(int fd, int pid); +extern int os_get_ifname(int fd, char *namebuf); +extern int os_set_slip(int fd); +extern int os_set_owner(int fd, int pid); +extern int os_sigio_async(int master, int slave); +extern int os_mode_fd(int fd, int mode); + extern int os_seek_file(int fd, __u64 offset); extern int os_open_file(char *file, struct openflags flags, int mode); extern int os_read_file(int fd, void *buf, int len); -extern int os_write_file(int fd, void *buf, int count); +extern int os_write_file(int fd, const void *buf, int count); extern int os_file_size(char *file, long long *size_out); +extern int os_file_modtime(char *file, unsigned long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); extern int os_set_fd_async(int fd, int owner); extern int os_set_fd_block(int fd, int blocking); extern int os_accept_connection(int fd); +extern int os_create_unix_socket(char *file, int len, int close_on_exec); extern int os_shutdown_socket(int fd, int r, int w); extern void os_close_file(int fd); extern int os_rcv_fd(int fd, int *helper_pid_out); -extern int create_unix_socket(char *file, int len); +extern int create_unix_socket(char *file, int len, int close_on_exec); extern int os_connect_socket(char *name); extern int os_file_type(char *file); extern int os_file_mode(char *file, struct openflags *mode_out); +extern int os_lock_file(int fd, int excl); extern unsigned long os_process_pc(int pid); extern int os_process_parent(int pid); @@ -115,11 +159,12 @@ extern void os_kill_process(int pid, int extern void os_usr1_process(int pid); extern int os_getpid(void); -extern int os_map_memory(void *virt, int fd, unsigned long off, +extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); extern int os_protect_memory(void *addr, unsigned long len, int r, int w, int x); extern int os_unmap_memory(void *addr, int len); +extern void os_flush_stdout(void); #endif --- linux-2.6.3-rc2/arch/um/include/skas_ptrace.h 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/include/skas_ptrace.h 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ --- linux-2.6.3-rc2/arch/um/include/sysdep-i386/frame_user.h 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/include/sysdep-i386/frame_user.h 2004-02-12 00:40:26.000000000 -0800 @@ -56,26 +56,26 @@ static inline void setup_arch_frame(stru * it would have to be __builtin_frame_address(1). */ -static inline unsigned long frame_restorer(void) -{ - unsigned long *fp; - - fp = __builtin_frame_address(0); - return((unsigned long) (fp + 1)); -} +#define frame_restorer() \ +({ \ + unsigned long *fp; \ +\ + fp = __builtin_frame_address(0); \ + ((unsigned long) (fp + 1)); \ +}) /* Similarly, this returns the value of sp when the handler was first * entered. This is used to calculate the proper sp when delivering * signals. */ -static inline unsigned long frame_sp(void) -{ - unsigned long *fp; - - fp = __builtin_frame_address(0); - return((unsigned long) (fp + 1)); -} +#define frame_sp() \ +({ \ + unsigned long *fp; \ +\ + fp = __builtin_frame_address(0); \ + ((unsigned long) (fp + 1)); \ +}) #endif --- linux-2.6.3-rc2/arch/um/include/sysdep-i386/sigcontext.h 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/include/sysdep-i386/sigcontext.h 2004-02-12 00:40:26.000000000 -0800 @@ -28,8 +28,8 @@ */ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) -/* These are General Protection and Page Fault */ -#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14)) +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(trap) (trap == 14) #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) --- linux-2.6.3-rc2/arch/um/include/ubd_user.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/include/ubd_user.h 2004-02-12 00:40:26.000000000 -0800 @@ -9,7 +9,7 @@ #include "os.h" -enum ubd_req { UBD_READ, UBD_WRITE }; +enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; struct io_thread_req { enum ubd_req op; @@ -20,8 +20,10 @@ struct io_thread_req { char *buffer; int sectorsize; unsigned long sector_mask; - unsigned long cow_offset; + unsigned long long cow_offset; unsigned long bitmap_words[2]; + int map_fd; + unsigned long long map_offset; int error; }; @@ -31,7 +33,7 @@ extern int open_ubd_file(char *file, str int *create_cow_out); extern int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, int sectorsize, - int *bitmap_offset_out, + int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); @@ -39,7 +41,6 @@ extern int read_ubd_fs(int fd, void *buf extern int write_ubd_fs(int fd, char *buffer, int len); extern int start_io_thread(unsigned long sp, int *fds_out); extern void do_io(struct io_thread_req *req); -extern int ubd_is_dir(char *file); static inline int ubd_test_bit(__u64 bit, unsigned char *data) { --- linux-2.6.3-rc2/arch/um/include/um_uaccess.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/include/um_uaccess.h 2004-02-12 00:40:26.000000000 -0800 @@ -38,22 +38,73 @@ static inline int copy_to_user(void *to, from, n)); } +/* + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ + static inline int strncpy_from_user(char *dst, const char *src, int count) { return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, dst, src, count)); } +/* + * __clear_user: - Zero a block of memory in user space, with less checking. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ static inline int __clear_user(void *mem, int len) { return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); } +/* + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ static inline int clear_user(void *mem, int len) { return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); } +/* + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * @n: The maximum valid length + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. + */ static inline int strnlen_user(const void *str, int len) { return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); --- linux-2.6.3-rc2/arch/um/include/user.h 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/include/user.h 2004-02-12 00:40:26.000000000 -0800 @@ -14,6 +14,7 @@ extern void *um_kmalloc_atomic(int size) extern void kfree(void *ptr); extern int in_aton(char *str); extern int open_gdb_chan(void); +extern int strlcpy(char *, const char *, int); #endif --- linux-2.6.3-rc2/arch/um/include/user_util.h 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/include/user_util.h 2004-02-12 00:40:26.000000000 -0800 @@ -14,8 +14,6 @@ extern int grantpt(int __fd); extern int unlockpt(int __fd); extern char *ptsname(int __fd); -enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; - struct cpu_task { int pid; void *task; @@ -59,7 +57,6 @@ extern int wait_for_stop(int pid, int si extern void *add_signal_handler(int sig, void (*handler)(int)); extern int start_fork_tramp(void *arg, unsigned long temp_stack, int clone_flags, int (*tramp)(void *)); -extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags); extern int linux_main(int argc, char **argv); extern void set_cmdline(char *cmd); extern void input_cb(void (*proc)(void *), void *arg, int arg_len); @@ -86,11 +83,13 @@ extern void check_sigio(void); extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); extern void write_sigio_workaround(void); extern void arch_check_bugs(void); +extern int cpu_feature(char *what, char *buf, int len); extern int arch_handle_signal(int sig, union uml_pt_regs *regs); extern int arch_fixup(unsigned long address, void *sc_ptr); extern void forward_pending_sigio(int target); extern int can_do_skas(void); - +extern void arch_init_thread(void); + #endif /* --- linux-2.6.3-rc2/arch/um/Kconfig 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/um/Kconfig 2004-02-12 00:40:26.000000000 -0800 @@ -61,6 +61,20 @@ config MODE_SKAS config NET bool "Networking support" + help + Unless you really know what you are doing, you should say Y here. + The reason is that some programs need kernel networking support even + when running on a stand-alone machine that isn't connected to any + other computer. If you are upgrading from an older kernel, you + should consider updating your networking tools too because changes + in the kernel and the tools often go hand in hand. The tools are + contained in the package net-tools, the location and version number + of which are given in Documentation/Changes. + + For a general introduction to Linux networking, it is highly + recommended to read the NET-HOWTO, available from + . + source "fs/Kconfig.binfmt" @@ -85,6 +99,19 @@ config HOSTFS If you'd like to be able to work with files stored on the host, say Y or M here; otherwise say N. +config HPPFS + tristate "HoneyPot ProcFS" + help + hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc + entries to be overridden, removed, or fabricated from the host. + Its purpose is to allow a UML to appear to be a physical machine + by removing or changing anything in /proc which gives away the + identity of a UML. + + See http://user-mode-linux.sf.net/hppfs.html for more information. + + You only need this if you are setting up a UML honeypot. Otherwise, + it is safe to say 'N' here. config MCONSOLE bool "Management console" @@ -105,6 +132,16 @@ config MCONSOLE config MAGIC_SYSRQ bool "Magic SysRq key" depends on MCONSOLE + help + If you say Y here, you will have some control over the system even + if the system crashes for example during kernel debugging (e.g., you + will be able to flush the buffer cache to disk, reboot the system + immediately or dump some status information). This is accomplished + by pressing various keys while holding SysRq (Alt+PrintScreen). It + also works on a serial console (on PC hardware at least), if you + send a BREAK and then within 5 seconds a command keypress. The + keys are documented in Documentation/sysrq.txt. Don't say Y + unless you really know what this hack does. config HOST_2G_2G bool "2G/2G host address space split" @@ -160,6 +197,9 @@ config KERNEL_HALF_GIGS config HIGHMEM bool "Highmem support" +config PROC_MM + bool "/proc/mm support" + config KERNEL_STACK_ORDER int "Kernel stack size order" default 2 @@ -168,6 +208,17 @@ config KERNEL_STACK_ORDER be 1 << order pages. The default is OK unless you're running Valgrind on UML, in which case, set this to 3. +config UML_REAL_TIME_CLOCK + bool "Real-time Clock" + default y + help + This option makes UML time deltas match wall clock deltas. This should + normally be enabled. The exception would be if you are debugging with + UML and spend long times with UML stopped at a breakpoint. In this + case, when UML is restarted, it will call the timer enough times to make + up for the time spent at the breakpoint. This could result in a + noticable lag. If this is a problem, then disable this option. + endmenu source "init/Kconfig" @@ -240,6 +291,10 @@ config FRAME_POINTER config PT_PROXY bool "Enable ptrace proxy" depends on XTERM_CHAN && DEBUG_INFO + help + This option enables a debugging interface which allows gdb to debug + the kernel without needing to actually attach to kernel threads. + If you want to do kernel debugging, say Y here; otherwise say N. config GPROF bool "Enable gprof support" --- linux-2.6.3-rc2/arch/um/Kconfig_block 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/Kconfig_block 2004-02-12 00:40:26.000000000 -0800 @@ -29,6 +29,20 @@ config BLK_DEV_UBD_SYNC wise choice too. In all other cases (for example, if you're just playing around with User-Mode Linux) you can choose N. +# Turn this back on when the driver actually works +# +#config BLK_DEV_COW +# tristate "COW block device" +# help +# This is a layered driver which sits above two other block devices. +# One is read-only, and the other is a read-write layer which stores +# all changes. This provides the illusion that the read-only layer +# can be mounted read-write and changed. + +config BLK_DEV_COW_COMMON + bool + default BLK_DEV_COW || BLK_DEV_UBD + config BLK_DEV_LOOP tristate "Loopback device support" --- linux-2.6.3-rc2/arch/um/Kconfig_net 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/Kconfig_net 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ -menu "Network Devices" +menu "UML Network Devices" depends on NET # UML virtual driver @@ -176,73 +176,5 @@ config UML_NET_SLIRP Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" - -# Below are hardware-independent drivers mirrored from -# drivers/net/Config.in. It would be nice if Linux -# had HW independent drivers separated from the other -# but it does not. Until then each non-ISA/PCI arch -# needs to provide it's own menu of network drivers -config DUMMY - tristate "Dummy net driver support" - -config BONDING - tristate "Bonding driver support" - -config EQUALIZER - tristate "EQL (serial line load balancing) support" - -config TUN - tristate "Universal TUN/TAP device driver support" - -config ETHERTAP - tristate "Ethertap network tap (OBSOLETE)" - depends on EXPERIMENTAL && NETLINK - -config PPP - tristate "PPP (point-to-point protocol) support" - -config PPP_MULTILINK - bool "PPP multilink support (EXPERIMENTAL)" - depends on PPP && EXPERIMENTAL - -config PPP_FILTER - bool "PPP filtering" - depends on PPP && FILTER - -config PPP_ASYNC - tristate "PPP support for async serial ports" - depends on PPP - -config PPP_SYNC_TTY - tristate "PPP support for sync tty ports" - depends on PPP - -config PPP_DEFLATE - tristate "PPP Deflate compression" - depends on PPP - -config PPP_BSDCOMP - tristate "PPP BSD-Compress compression" - depends on PPP - -config PPPOE - tristate "PPP over Ethernet (EXPERIMENTAL)" - depends on PPP && EXPERIMENTAL - -config SLIP - tristate "SLIP (serial line) support" - -config SLIP_COMPRESSED - bool "CSLIP compressed headers" - depends on SLIP=y - -config SLIP_SMART - bool "Keepalive and linefill" - depends on SLIP=y - -config SLIP_MODE_SLIP6 - bool "Six bit SLIP encapsulation" - depends on SLIP=y - endmenu --- linux-2.6.3-rc2/arch/um/kernel/config.c.in 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/config.c.in 2004-02-12 00:40:26.000000000 -0800 @@ -7,9 +7,7 @@ #include #include "init.h" -static __initdata char *config = " -CONFIG -"; +static __initdata char *config = "CONFIG"; static int __init print_config(char *line, int *add) { --- linux-2.6.3-rc2/arch/um/kernel/exec_kern.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/arch/um/kernel/exec_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -32,10 +32,15 @@ void start_thread(struct pt_regs *regs, CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); } +extern void log_exec(char **argv, void *tty); + static int execve1(char *file, char **argv, char **env) { int error; +#ifdef CONFIG_TTY_LOG + log_exec(argv, current->tty); +#endif error = do_execve(file, argv, env, ¤t->thread.regs); if (error == 0){ current->ptrace &= ~PT_DTRACE; --- linux-2.6.3-rc2/arch/um/kernel/frame.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/kernel/frame.c 2004-02-12 00:40:26.000000000 -0800 @@ -279,7 +279,7 @@ void capture_signal_stack(void) struct sc_frame_raw raw_sc; struct si_frame_raw raw_si; void *stack, *sigstack; - unsigned long top, sig_top, base; + unsigned long top, base; stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -292,7 +292,6 @@ void capture_signal_stack(void) } top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - sig_top = (unsigned long) sigstack + PAGE_SIZE; /* Get the sigcontext, no sigrestorer layout */ raw_sc.restorer = 0; --- linux-2.6.3-rc2/arch/um/kernel/frame_kern.c 2003-06-14 12:18:52.000000000 -0700 +++ 25/arch/um/kernel/frame_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -6,7 +6,6 @@ #include "asm/ptrace.h" #include "asm/uaccess.h" #include "asm/signal.h" -#include "asm/uaccess.h" #include "asm/ucontext.h" #include "frame_kern.h" #include "sigcontext.h" @@ -29,12 +28,15 @@ static int copy_restorer(void (*restorer sizeof(restorer))); } +extern int userspace_pid[]; + static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, struct arch_frame_data *arch) { return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), arch), - copy_sc_to_user_skas(to, fp, &from->regs, + copy_sc_to_user_skas(userspace_pid[0], to, fp, + &from->regs, current->thread.cr2, current->thread.err))); } --- linux-2.6.3-rc2/arch/um/kernel/helper.c 2003-06-14 12:18:20.000000000 -0700 +++ 25/arch/um/kernel/helper.c 2004-02-12 00:40:26.000000000 -0800 @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,7 @@ static int helper_child(void *arg) { struct helper_data *data = arg; char **argv = data->argv; + int errval; if(helper_pause){ signal(SIGHUP, helper_hup); @@ -41,8 +41,9 @@ static int helper_child(void *arg) if(data->pre_exec != NULL) (*data->pre_exec)(data->pre_data); execvp(argv[0], argv); + errval = errno; printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); - write(data->fd, &errno, sizeof(errno)); + os_write_file(data->fd, &errval, sizeof(errval)); os_kill_process(os_getpid(), 0); return(0); } @@ -59,17 +60,20 @@ int run_helper(void (*pre_exec)(void *), if((stack_out != NULL) && (*stack_out != 0)) stack = *stack_out; else stack = alloc_stack(0, um_in_interrupt()); - if(stack == 0) return(-ENOMEM); + if(stack == 0) + return(-ENOMEM); err = os_pipe(fds, 1, 0); - if(err){ - printk("run_helper : pipe failed, errno = %d\n", -err); - return(err); + if(err < 0){ + printk("run_helper : pipe failed, err = %d\n", -err); + goto out_free; } - if(fcntl(fds[1], F_SETFD, 1) != 0){ - printk("run_helper : setting FD_CLOEXEC failed, errno = %d\n", - errno); - return(-errno); + + err = os_set_exec_close(fds[1], 1); + if(err < 0){ + printk("run_helper : setting FD_CLOEXEC failed, err = %d\n", + -err); + goto out_close; } sp = stack + page_size() - sizeof(void *); @@ -80,23 +84,34 @@ int run_helper(void (*pre_exec)(void *), pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); if(pid < 0){ printk("run_helper : clone failed, errno = %d\n", errno); - return(-errno); + err = -errno; + goto out_close; } - close(fds[1]); - n = read(fds[0], &err, sizeof(err)); + + os_close_file(fds[1]); + n = os_read_file(fds[0], &err, sizeof(err)); if(n < 0){ - printk("run_helper : read on pipe failed, errno = %d\n", - errno); - return(-errno); + printk("run_helper : read on pipe failed, err = %d\n", -n); + err = n; + goto out_kill; } else if(n != 0){ waitpid(pid, NULL, 0); - pid = -err; + pid = -errno; } if(stack_out == NULL) free_stack(stack, 0); else *stack_out = stack; return(pid); + + out_kill: + os_kill_process(pid, 1); + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + out_free: + free_stack(stack, 0); + return(err); } int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, @@ -117,9 +132,11 @@ int run_helper_thread(int (*proc)(void * } if(stack_out == NULL){ pid = waitpid(pid, &status, 0); - if(pid < 0) + if(pid < 0){ printk("run_helper_thread - wait failed, errno = %d\n", - pid); + errno); + pid = -errno; + } if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) printk("run_helper_thread - thread returned status " "0x%x\n", status); --- linux-2.6.3-rc2/arch/um/kernel/initrd_user.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/initrd_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "user_util.h" @@ -19,13 +18,15 @@ int load_initrd(char *filename, void *bu { int fd, n; - if((fd = os_open_file(filename, of_read(OPENFLAGS()), 0)) < 0){ - printk("Opening '%s' failed - errno = %d\n", filename, errno); + fd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Opening '%s' failed - err = %d\n", filename, -fd); return(-1); } - if((n = read(fd, buf, size)) != size){ - printk("Read of %d bytes from '%s' returned %d, errno = %d\n", - size, filename, n, errno); + n = os_read_file(fd, buf, size); + if(n != size){ + printk("Read of %d bytes from '%s' failed, err = %d\n", size, + filename, -n); return(-1); } return(0); --- linux-2.6.3-rc2/arch/um/kernel/init_task.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/init_task.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,7 +8,6 @@ #include "linux/module.h" #include "linux/sched.h" #include "linux/init_task.h" -#include "linux/version.h" #include "asm/uaccess.h" #include "asm/pgtable.h" #include "user_util.h" @@ -18,7 +17,7 @@ static struct fs_struct init_fs = INIT_F struct mm_struct init_mm = INIT_MM(init_mm); static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); - +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); EXPORT_SYMBOL(init_mm); /* @@ -43,26 +42,12 @@ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = { INIT_THREAD_INFO(init_task) }; -struct task_struct *alloc_task_struct(void) -{ - return((struct task_struct *) - __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); -} - void unprotect_stack(unsigned long stack) { protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, 1, 1, 0, 1); } -void free_task_struct(struct task_struct *task) -{ - /* free_pages decrements the page counter and only actually frees - * the pages if they are now not accessed by anything. - */ - free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); -} - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.3-rc2/arch/um/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/um/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -29,6 +29,7 @@ #include "user_util.h" #include "kern_util.h" #include "irq_user.h" +#include "irq_kern.h" static void register_irq_proc (unsigned int irq); @@ -83,65 +84,55 @@ struct hw_interrupt_type no_irq_type = { end_none }; -/* Not changed */ -volatile unsigned long irq_err_count; - /* * Generic, controller-independent functions: */ -int get_irq_list(char *buf) +int show_interrupts(struct seq_file *p, void *v) { - int i, j; - unsigned long flags; + int i = *(loff_t *) v, j; struct irqaction * action; - char *p = buf; + unsigned long flags; - p += sprintf(p, " "); - for (j=0; jtypename); - p += sprintf(p, " %s", action->name); + seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) - p += sprintf(p, ", %s", action->name); - *p++ = '\n'; - end: + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) { + seq_printf(p, "NMI: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", nmi_count(j)); + seq_putc(p, '\n'); } - p += sprintf(p, "\n"); -#ifdef notdef -#ifdef CONFIG_SMP - p += sprintf(p, "LOC: "); - for (j = 0; j < num_online_cpus(); j++) - p += sprintf(p, "%10u ", - apic_timer_irqs[cpu_logical_map(j)]); - p += sprintf(p, "\n"); -#endif -#endif - p += sprintf(p, "ERR: %10lu\n", irq_err_count); - return p - buf; -} - -int show_interrupts(struct seq_file *p, void *v) -{ - return(0); + return 0; } /* @@ -230,8 +221,11 @@ inline void synchronize_irq(unsigned int void disable_irq(unsigned int irq) { + irq_desc_t *desc = irq_desc + irq; + disable_irq_nosync(irq); - synchronize_irq(irq); + if(desc->action) + synchronize_irq(irq); } /** @@ -252,7 +246,7 @@ void enable_irq(unsigned int irq) spin_lock_irqsave(&desc->lock, flags); switch (desc->depth) { case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; + unsigned int status = desc->status & IRQ_DISABLED; desc->status = status; if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { desc->status = status | IRQ_REPLAY; @@ -282,13 +276,12 @@ unsigned int do_IRQ(int irq, union uml_p * 0 return value means that this irq is already being * handled by some other CPU. (or is disabled) */ - int cpu = smp_processor_id(); irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; irq_enter(); - kstat_cpu(cpu).irqs[irq]++; + kstat_this_cpu.irqs[irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); /* @@ -385,7 +378,7 @@ out: */ int request_irq(unsigned int irq, - void (*handler)(int, void *, struct pt_regs *), + irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char * devname, void *dev_id) @@ -433,15 +426,19 @@ int request_irq(unsigned int irq, EXPORT_SYMBOL(request_irq); int um_request_irq(unsigned int irq, int fd, int type, - void (*handler)(int, void *, struct pt_regs *), + irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char * devname, void *dev_id) { - int retval; + int err; - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if(retval) return(retval); - return(activate_fd(irq, fd, type, dev_id)); + err = request_irq(irq, handler, irqflags, devname, dev_id); + if(err) + return(err); + + if(fd != -1) + err = activate_fd(irq, fd, type, dev_id); + return(err); } /* this was setup_x86_irq but it seems pretty generic */ @@ -474,7 +471,8 @@ int setup_irq(unsigned int irq, struct i */ spin_lock_irqsave(&desc->lock,flags); p = &desc->action; - if ((old = *p) != NULL) { + old = *p; + if (old != NULL) { /* Can't share interrupts unless both agree to */ if (!(old->flags & new->flags & SA_SHIRQ)) { spin_unlock_irqrestore(&desc->lock,flags); @@ -575,7 +573,7 @@ static cpumask_t irq_affinity [NR_IRQS] static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -586,12 +584,14 @@ static int irq_affinity_write_proc (stru unsigned long count, void *data) { int irq = (long) data, full_count = count, err; - cpumask_t new_value, tmp; + cpumask_t new_value; if (!irq_desc[irq].handler->set_affinity) return -EIO; err = cpumask_parse(buffer, count, new_value); + if(err) + return(err); #ifdef CONFIG_SMP /* @@ -613,7 +613,8 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); + if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.3-rc2/arch/um/kernel/irq_user.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/irq_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -49,7 +48,8 @@ void sigio_handler(int sig, union uml_pt if(smp_sigio_handler()) return; while(1){ - if((n = poll(pollfds, pollfds_num, 0)) < 0){ + n = poll(pollfds, pollfds_num, 0); + if(n < 0){ if(errno == EINTR) continue; printk("sigio_handler : poll returned %d, " "errno = %d\n", n, errno); @@ -366,34 +366,31 @@ void deactivate_fd(int fd, int irqnum) void forward_ipi(int fd, int pid) { - if(fcntl(fd, F_SETOWN, pid) < 0){ - int save_errno = errno; - if(fcntl(fd, F_GETOWN, 0) != pid){ - printk("forward_ipi: F_SETOWN failed, fd = %d, " - "me = %d, target = %d, errno = %d\n", fd, - os_getpid(), pid, save_errno); - } - } + int err; + + err = os_set_owner(fd, pid); + if(err < 0) + printk("forward_ipi: set_owner failed, fd = %d, me = %d, " + "target = %d, err = %d\n", fd, os_getpid(), pid, -err); } void forward_interrupts(int pid) { struct irq_fd *irq; unsigned long flags; + int err; flags = irq_lock(); for(irq=active_fds;irq != NULL;irq = irq->next){ - if(fcntl(irq->fd, F_SETOWN, pid) < 0){ - int save_errno = errno; - if(fcntl(irq->fd, F_GETOWN, 0) != pid){ - /* XXX Just remove the irq rather than - * print out an infinite stream of these - */ - printk("Failed to forward %d to pid %d, " - "errno = %d\n", irq->fd, pid, - save_errno); - } + err = os_set_owner(irq->fd, pid); + if(err < 0){ + /* XXX Just remove the irq rather than + * print out an infinite stream of these + */ + printk("Failed to forward %d to pid %d, err = %d\n", + irq->fd, pid, -err); } + irq->pid = pid; } irq_unlock(flags); --- linux-2.6.3-rc2/arch/um/kernel/ksyms.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/kernel/ksyms.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -34,34 +34,62 @@ EXPORT_SYMBOL(task_size); EXPORT_SYMBOL(flush_tlb_range); EXPORT_SYMBOL(host_task_size); EXPORT_SYMBOL(arch_validate); +EXPORT_SYMBOL(get_kmem_end); -EXPORT_SYMBOL(region_pa); -EXPORT_SYMBOL(region_va); -EXPORT_SYMBOL(phys_mem_map); -EXPORT_SYMBOL(page_mem_map); EXPORT_SYMBOL(page_to_phys); EXPORT_SYMBOL(phys_to_page); EXPORT_SYMBOL(high_physmem); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(um_virt_to_phys); +EXPORT_SYMBOL(__virt_to_page); +EXPORT_SYMBOL(to_phys); +EXPORT_SYMBOL(to_virt); EXPORT_SYMBOL(mode_tt); EXPORT_SYMBOL(handle_page_fault); +#ifdef CONFIG_MODE_TT +EXPORT_SYMBOL(copy_from_user_tt); +EXPORT_SYMBOL(copy_to_user_tt); +#endif + +#ifdef CONFIG_MODE_SKAS +EXPORT_SYMBOL(copy_to_user_skas); +EXPORT_SYMBOL(copy_from_user_skas); +#endif + +EXPORT_SYMBOL(os_stat_fd); +EXPORT_SYMBOL(os_stat_file); +EXPORT_SYMBOL(os_access); +EXPORT_SYMBOL(os_print_error); +EXPORT_SYMBOL(os_get_exec_close); +EXPORT_SYMBOL(os_set_exec_close); EXPORT_SYMBOL(os_getpid); EXPORT_SYMBOL(os_open_file); EXPORT_SYMBOL(os_read_file); EXPORT_SYMBOL(os_write_file); EXPORT_SYMBOL(os_seek_file); +EXPORT_SYMBOL(os_lock_file); EXPORT_SYMBOL(os_pipe); EXPORT_SYMBOL(os_file_type); +EXPORT_SYMBOL(os_file_mode); +EXPORT_SYMBOL(os_file_size); +EXPORT_SYMBOL(os_flush_stdout); EXPORT_SYMBOL(os_close_file); +EXPORT_SYMBOL(os_set_fd_async); +EXPORT_SYMBOL(os_set_fd_block); EXPORT_SYMBOL(helper_wait); EXPORT_SYMBOL(os_shutdown_socket); +EXPORT_SYMBOL(os_create_unix_socket); EXPORT_SYMBOL(os_connect_socket); +EXPORT_SYMBOL(os_accept_connection); +EXPORT_SYMBOL(os_rcv_fd); EXPORT_SYMBOL(run_helper); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(do_gettimeofday); +EXPORT_SYMBOL(do_settimeofday); + /* This is here because UML expands open to sys_open, not to a system * call instruction. */ @@ -90,3 +118,13 @@ EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); #endif +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/kernel/Makefile 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -7,11 +7,11 @@ extra-y := vmlinux.lds.s obj-y = checksum.o config.o exec_kern.o exitcode.o frame_kern.o frame.o \ helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \ - process.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ - sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o \ - syscall_user.o sysrq.o sys_call_table.o tempfile.o time.o \ - time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o \ - umid.o user_syms.o user_util.o + physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ + sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ + syscall_kern.o syscall_user.o sysrq.o sys_call_table.o tempfile.o \ + time.o time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o \ + um_arch.o umid.o user_syms.o user_util.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o obj-$(CONFIG_GPROF) += gprof_syms.o @@ -21,6 +21,8 @@ obj-$(CONFIG_TTY_LOG) += tty_log.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ +clean-files := config.c + user-objs-$(CONFIG_TTY_LOG) += tty_log.o USER_OBJS := $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \ @@ -45,17 +47,13 @@ $(USER_OBJS) : %.o: %.c $(obj)/frame.o: $(src)/frame.c $(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $< -QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' +QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' $(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config $(PERL) -e $(QUOTE) < $(src)/config.c.in > $@ $(obj)/config.o : $(obj)/config.c -clean: - rm -f config.c - for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done - modules: fastdep: --- linux-2.6.3-rc2/arch/um/kernel/mem.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/mem.c 2004-02-12 00:40:58.000000000 -0800 @@ -1,74 +1,67 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ -#include "linux/config.h" -#include "linux/module.h" -#include "linux/types.h" +#include "linux/stddef.h" +#include "linux/kernel.h" #include "linux/mm.h" -#include "linux/fs.h" -#include "linux/init.h" #include "linux/bootmem.h" #include "linux/swap.h" -#include "linux/slab.h" -#include "linux/vmalloc.h" #include "linux/highmem.h" +#include "linux/gfp.h" #include "asm/page.h" -#include "asm/pgtable.h" +#include "asm/fixmap.h" #include "asm/pgalloc.h" -#include "asm/bitops.h" -#include "asm/uaccess.h" -#include "asm/tlb.h" #include "user_util.h" #include "kern_util.h" -#include "mem_user.h" -#include "mem.h" #include "kern.h" -#include "init.h" -#include "os.h" -#include "mode_kern.h" +#include "mem_user.h" #include "uml_uaccess.h" +#include "os.h" + +extern char __binary_start; /* Changed during early boot */ -pgd_t swapper_pg_dir[1024]; -unsigned long high_physmem; -unsigned long vm_start; -unsigned long vm_end; -unsigned long highmem; unsigned long *empty_zero_page = NULL; unsigned long *empty_bad_page = NULL; - -/* Not modified */ -const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n"; - -extern char __init_begin, __init_end; -extern long physmem_size; - -/* Not changed by UML */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -/* Changed during early boot */ +pgd_t swapper_pg_dir[1024]; +unsigned long highmem; int kmalloc_ok = 0; -#define NREGIONS (phys_region_index(0xffffffff) - phys_region_index(0x0) + 1) -struct mem_region *regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = NULL }; -#define REGION_SIZE ((0xffffffff & ~REGION_MASK) + 1) - -/* Changed during early boot */ static unsigned long brk_end; +static unsigned long totalram_pages = 0; + +void unmap_physmem(void) +{ + os_unmap_memory((void *) brk_end, uml_reserved - brk_end); +} static void map_cb(void *unused) { map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); } -void unmap_physmem(void) +#ifdef CONFIG_HIGHMEM +static void setup_highmem(unsigned long highmem_start, + unsigned long highmem_len) { - os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -} + struct page *page; + unsigned long highmem_pfn; + int i; -extern char __binary_start; + highmem_start_page = virt_to_page(highmem_start); + + highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; + for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ + page = &mem_map[highmem_pfn + i]; + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + } +} +#endif void mem_init(void) { @@ -103,50 +96,15 @@ void mem_init(void) totalhigh_pages = highmem >> PAGE_SHIFT; totalram_pages += totalhigh_pages; num_physpages = totalram_pages; - max_mapnr = totalram_pages; max_pfn = totalram_pages; printk(KERN_INFO "Memory: %luk available\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); kmalloc_ok = 1; -} - -/* Changed during early boot */ -static unsigned long kmem_top = 0; - -unsigned long get_kmem_end(void) -{ - if(kmem_top == 0) - kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); - return(kmem_top); -} - -void set_kmem_end(unsigned long new) -{ - kmem_top = new; -} #ifdef CONFIG_HIGHMEM -/* Changed during early boot */ -pte_t *kmap_pte; -pgprot_t kmap_prot; - -EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; + setup_highmem(end_iomem, highmem); +#endif } -#endif /* CONFIG_HIGHMEM */ static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) @@ -178,76 +136,24 @@ static void __init fixrange_init(unsigne } } -int init_maps(struct mem_region *region) -{ - struct page *p, *map; - int i, n, len; - - if(region == &physmem_region){ - region->mem_map = mem_map; - return(0); - } - else if(region->mem_map != NULL) return(0); - - n = region->len >> PAGE_SHIFT; - len = n * sizeof(struct page); - if(kmalloc_ok){ - map = kmalloc(len, GFP_KERNEL); - if(map == NULL) map = vmalloc(len); - } - else map = alloc_bootmem_low_pages(len); - - if(map == NULL) - return(-ENOMEM); - for(i = 0; i < n; i++){ - p = &map[i]; - set_page_count(p, 0); - SetPageReserved(p); - INIT_LIST_HEAD(&p->list); - } - region->mem_map = map; - return(0); -} +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; -DECLARE_MUTEX(regions_sem); +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) -static int setup_one_range(int fd, char *driver, unsigned long start, - unsigned long pfn, int len, - struct mem_region *region) +void __init kmap_init(void) { - int i; - - down(®ions_sem); - for(i = 0; i < NREGIONS; i++){ - if(regions[i] == NULL) break; - } - if(i == NREGIONS){ - printk("setup_range : no free regions\n"); - i = -1; - goto out; - } - - if(fd == -1) - fd = create_mem_file(len); + unsigned long kmap_vstart; - if(region == NULL){ - region = alloc_bootmem_low_pages(sizeof(*region)); - if(region == NULL) - panic("Failed to allocating mem_region"); - } + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - *region = ((struct mem_region) { .driver = driver, - .start_pfn = pfn, - .start = start, - .len = len, - .fd = fd } ); - regions[i] = region; - out: - up(®ions_sem); - return(i); + kmap_prot = PAGE_KERNEL; } -#ifdef CONFIG_HIGHMEM static void init_highmem(void) { pgd_t *pgd; @@ -268,63 +174,20 @@ static void init_highmem(void) kmap_init(); } - -void setup_highmem(unsigned long len) -{ - struct mem_region *region; - struct page *page, *map; - unsigned long phys; - int i, cur, index; - - phys = physmem_size; - do { - cur = min(len, (unsigned long) REGION_SIZE); - i = setup_one_range(-1, NULL, -1, phys >> PAGE_SHIFT, cur, - NULL); - if(i == -1){ - printk("setup_highmem - setup_one_range failed\n"); - return; - } - region = regions[i]; - index = phys / PAGE_SIZE; - region->mem_map = &mem_map[index]; - - map = region->mem_map; - for(i = 0; i < (cur >> PAGE_SHIFT); i++){ - page = &map[i]; - ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - atomic_set(&page->count, 1); - __free_page(page); - } - phys += cur; - len -= cur; - } while(len > 0); -} -#endif +#endif /* CONFIG_HIGHMEM */ void paging_init(void) { - struct mem_region *region; - unsigned long zones_size[MAX_NR_ZONES], start, end, vaddr; - int i, index; + unsigned long zones_size[MAX_NR_ZONES], vaddr; + int i; empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); for(i=0;i> PAGE_SHIFT) - - (uml_physmem >> PAGE_SHIFT); + zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); zones_size[2] = highmem >> PAGE_SHIFT; free_area_init(zones_size); - start = phys_region_index(__pa(uml_physmem)); - end = phys_region_index(__pa(high_physmem - 1)); - for(i = start; i <= end; i++){ - region = regions[i]; - index = (region->start - uml_physmem) / PAGE_SIZE; - region->mem_map = &mem_map[index]; - if(i > start) free_bootmem(__pa(region->start), region->len); - } /* * Fixed mappings, only the page table structure has to be @@ -335,15 +198,33 @@ void paging_init(void) #ifdef CONFIG_HIGHMEM init_highmem(); - setup_highmem(highmem); #endif } -pte_t __bad_page(void) +struct page *arch_validate(struct page *page, int mask, int order) { - clear_page(empty_bad_page); - return pte_mkdirty(mk_pte((struct page *) empty_bad_page, - PAGE_SHARED)); + unsigned long addr, zero = 0; + int i; + + again: + if(page == NULL) return(page); + if(PageHighMem(page)) return(page); + + addr = (unsigned long) page_address(page); + for(i = 0; i < (1 << order); i++){ + current->thread.fault_addr = (void *) addr; + if(__do_copy_to_user((void *) addr, &zero, + sizeof(zero), + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)){ + if(!(mask & __GFP_WAIT)) return(NULL); + else break; + } + addr += PAGE_SIZE; + } + if(i == (1 << order)) return(page); + page = alloc_pages(mask, order); + goto again; } /* This can't do anything because nothing in the kernel image can be freed @@ -401,395 +282,6 @@ void show_mem(void) printk("%d pages swap cached\n", cached); } -static int __init uml_mem_setup(char *line, int *add) -{ - char *retptr; - physmem_size = memparse(line,&retptr); - return 0; -} -__uml_setup("mem=", uml_mem_setup, -"mem=\n" -" This controls how much \"physical\" memory the kernel allocates\n" -" for the system. The size is specified as a number followed by\n" -" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" -" This is not related to the amount of memory in the physical\n" -" machine. It can be more, and the excess, if it's ever used, will\n" -" just be swapped out.\n Example: mem=64M\n\n" -); - -struct page *arch_validate(struct page *page, int mask, int order) -{ - unsigned long addr, zero = 0; - int i; - - again: - if(page == NULL) return(page); - if(PageHighMem(page)) return(page); - - addr = (unsigned long) page_address(page); - for(i = 0; i < (1 << order); i++){ - current->thread.fault_addr = (void *) addr; - if(__do_copy_to_user((void *) addr, &zero, - sizeof(zero), - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)){ - if(!(mask & __GFP_WAIT)) return(NULL); - else break; - } - addr += PAGE_SIZE; - } - if(i == (1 << order)) return(page); - page = alloc_pages(mask, order); - goto again; -} - -DECLARE_MUTEX(vm_reserved_sem); -static struct list_head vm_reserved = LIST_HEAD_INIT(vm_reserved); - -/* Static structures, linked in to the list in early boot */ -static struct vm_reserved head = { - .list = LIST_HEAD_INIT(head.list), - .start = 0, - .end = 0xffffffff -}; - -static struct vm_reserved tail = { - .list = LIST_HEAD_INIT(tail.list), - .start = 0, - .end = 0xffffffff -}; - -void set_usable_vm(unsigned long start, unsigned long end) -{ - list_add(&head.list, &vm_reserved); - list_add(&tail.list, &head.list); - head.end = start; - tail.start = end; -} - -int reserve_vm(unsigned long start, unsigned long end, void *e) - -{ - struct vm_reserved *entry = e, *reserved, *prev; - struct list_head *ele; - int err; - - down(&vm_reserved_sem); - list_for_each(ele, &vm_reserved){ - reserved = list_entry(ele, struct vm_reserved, list); - if(reserved->start >= end) goto found; - } - panic("Reserved vm out of range"); - found: - prev = list_entry(ele->prev, struct vm_reserved, list); - if(prev->end > start) - panic("Can't reserve vm"); - if(entry == NULL) - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if(entry == NULL){ - printk("reserve_vm : Failed to allocate entry\n"); - err = -ENOMEM; - goto out; - } - *entry = ((struct vm_reserved) - { .list = LIST_HEAD_INIT(entry->list), - .start = start, - .end = end }); - list_add(&entry->list, &prev->list); - err = 0; - out: - up(&vm_reserved_sem); - return(0); -} - -unsigned long get_vm(unsigned long len) -{ - struct vm_reserved *this, *next; - struct list_head *ele; - unsigned long start; - int err; - - down(&vm_reserved_sem); - list_for_each(ele, &vm_reserved){ - this = list_entry(ele, struct vm_reserved, list); - next = list_entry(ele->next, struct vm_reserved, list); - if((this->start < next->start) && - (this->end + len + PAGE_SIZE <= next->start)) - goto found; - } - up(&vm_reserved_sem); - return(0); - found: - up(&vm_reserved_sem); - start = (unsigned long) UML_ROUND_UP(this->end) + PAGE_SIZE; - err = reserve_vm(start, start + len, NULL); - if(err) return(0); - return(start); -} - -int nregions(void) -{ - return(NREGIONS); -} - -void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn, - unsigned long len, int need_vm, struct mem_region *region, - void *reserved) -{ - int i, cur; - - do { - cur = min(len, (unsigned long) REGION_SIZE); - i = setup_one_range(fd, driver, start, pfn, cur, region); - region = regions[i]; - if(need_vm && setup_region(region, reserved)){ - kfree(region); - regions[i] = NULL; - return; - } - start += cur; - if(pfn != -1) pfn += cur; - len -= cur; - } while(len > 0); -} - -struct iomem { - char *name; - int fd; - unsigned long size; -}; - -/* iomem regions can only be added on the command line at the moment. - * Locking will be needed when they can be added via mconsole. - */ - -struct iomem iomem_regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = - { .name = NULL, - .fd = -1, - .size = 0 } }; - -int num_iomem_regions = 0; - -void add_iomem(char *name, int fd, unsigned long size) -{ - if(num_iomem_regions == sizeof(iomem_regions)/sizeof(iomem_regions[0])) - return; - size = (size + PAGE_SIZE - 1) & PAGE_MASK; - iomem_regions[num_iomem_regions++] = - ((struct iomem) { .name = name, - .fd = fd, - .size = size } ); -} - -int setup_iomem(void) -{ - struct iomem *iomem; - int i; - - for(i = 0; i < num_iomem_regions; i++){ - iomem = &iomem_regions[i]; - setup_range(iomem->fd, iomem->name, -1, -1, iomem->size, 1, - NULL, NULL); - } - return(0); -} - -__initcall(setup_iomem); - -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) - -/* Changed during early boot */ -static struct mem_region physmem_region; -static struct vm_reserved physmem_reserved; - -void setup_physmem(unsigned long start, unsigned long reserve_end, - unsigned long len) -{ - struct mem_region *region = &physmem_region; - struct vm_reserved *reserved = &physmem_reserved; - unsigned long cur, pfn = 0; - int do_free = 1, bootmap_size; - - do { - cur = min(len, (unsigned long) REGION_SIZE); - if(region == NULL) - region = alloc_bootmem_low_pages(sizeof(*region)); - if(reserved == NULL) - reserved = alloc_bootmem_low_pages(sizeof(*reserved)); - if((region == NULL) || (reserved == NULL)) - panic("Couldn't allocate physmem region or vm " - "reservation\n"); - setup_range(-1, NULL, start, pfn, cur, 1, region, reserved); - - if(do_free){ - unsigned long reserve = reserve_end - start; - int pfn = PFN_UP(__pa(reserve_end)); - int delta = (len - reserve) >> PAGE_SHIFT; - - bootmap_size = init_bootmem(pfn, pfn + delta); - free_bootmem(__pa(reserve_end) + bootmap_size, - cur - bootmap_size - reserve); - do_free = 0; - } - start += cur; - pfn += cur >> PAGE_SHIFT; - len -= cur; - region = NULL; - reserved = NULL; - } while(len > 0); -} - -struct mem_region *phys_region(unsigned long phys) -{ - unsigned int n = phys_region_index(phys); - - if(regions[n] == NULL) - panic("Physical address in uninitialized region"); - return(regions[n]); -} - -unsigned long phys_offset(unsigned long phys) -{ - return(phys_addr(phys)); -} - -struct page *phys_mem_map(unsigned long phys) -{ - return((struct page *) phys_region(phys)->mem_map); -} - -struct page *pte_mem_map(pte_t pte) -{ - return(phys_mem_map(pte_val(pte))); -} - -struct mem_region *page_region(struct page *page, int *index_out) -{ - int i; - struct mem_region *region; - struct page *map; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if(region == NULL) continue; - map = region->mem_map; - if((page >= map) && (page < &map[region->len >> PAGE_SHIFT])){ - if(index_out != NULL) *index_out = i; - return(region); - } - } - panic("No region found for page"); - return(NULL); -} - -unsigned long page_to_pfn(struct page *page) -{ - struct mem_region *region = page_region(page, NULL); - - return(region->start_pfn + (page - (struct page *) region->mem_map)); -} - -struct mem_region *pfn_to_region(unsigned long pfn, int *index_out) -{ - struct mem_region *region; - int i; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if(region == NULL) - continue; - - if((region->start_pfn <= pfn) && - (region->start_pfn + (region->len >> PAGE_SHIFT) > pfn)){ - if(index_out != NULL) - *index_out = i; - return(region); - } - } - return(NULL); -} - -struct page *pfn_to_page(unsigned long pfn) -{ - struct mem_region *region = pfn_to_region(pfn, NULL); - struct page *mem_map = (struct page *) region->mem_map; - - return(&mem_map[pfn - region->start_pfn]); -} - -unsigned long phys_to_pfn(unsigned long p) -{ - struct mem_region *region = regions[phys_region_index(p)]; - - return(region->start_pfn + (phys_addr(p) >> PAGE_SHIFT)); -} - -unsigned long pfn_to_phys(unsigned long pfn) -{ - int n; - struct mem_region *region = pfn_to_region(pfn, &n); - - return(mk_phys((pfn - region->start_pfn) << PAGE_SHIFT, n)); -} - -struct page *page_mem_map(struct page *page) -{ - return((struct page *) page_region(page, NULL)->mem_map); -} - -extern unsigned long region_pa(void *virt) -{ - struct mem_region *region; - unsigned long addr = (unsigned long) virt; - int i; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if(region == NULL) continue; - if((region->start <= addr) && - (addr <= region->start + region->len)) - return(mk_phys(addr - region->start, i)); - } - panic("region_pa : no region for virtual address"); - return(0); -} - -extern void *region_va(unsigned long phys) -{ - return((void *) (phys_region(phys)->start + phys_addr(phys))); -} - -unsigned long page_to_phys(struct page *page) -{ - int n; - struct mem_region *region = page_region(page, &n); - struct page *map = region->mem_map; - return(mk_phys((page - map) << PAGE_SHIFT, n)); -} - -struct page *phys_to_page(unsigned long phys) -{ - struct page *mem_map; - - mem_map = phys_mem_map(phys); - return(mem_map + (phys_offset(phys) >> PAGE_SHIFT)); -} - -static int setup_mem_maps(void) -{ - struct mem_region *region; - int i; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if((region != NULL) && (region->fd > 0)) init_maps(region); - } - return(0); -} - -__initcall(setup_mem_maps); - /* * Allocate and free page tables. */ --- linux-2.6.3-rc2/arch/um/kernel/mem_user.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/mem_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -34,10 +34,9 @@ #include #include #include -#include #include #include -#include +#include #include #include #include "kern_util.h" @@ -47,105 +46,145 @@ #include "init.h" #include "os.h" #include "tempfile.h" +#include "kern_constants.h" extern struct mem_region physmem_region; #define TEMPNAME_TEMPLATE "vm_file-XXXXXX" -int create_mem_file(unsigned long len) +static int create_tmp_file(unsigned long len) { - int fd; + int fd, err; char zero; fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); - if (fchmod(fd, 0777) < 0){ - perror("fchmod"); + if(fd < 0) { + os_print_error(fd, "make_tempfile"); + exit(1); + } + + err = os_mode_fd(fd, 0777); + if(err < 0){ + os_print_error(err, "os_mode_fd"); exit(1); } - if(os_seek_file(fd, len) < 0){ - perror("lseek"); + err = os_seek_file(fd, len); + if(err < 0){ + os_print_error(err, "os_seek_file"); exit(1); } zero = 0; - if(write(fd, &zero, 1) != 1){ - perror("write"); + err = os_write_file(fd, &zero, 1); + if(err != 1){ + os_print_error(err, "os_write_file"); exit(1); } - if(fcntl(fd, F_SETFD, 1) != 0) - perror("Setting FD_CLOEXEC failed"); + return(fd); } -int setup_region(struct mem_region *region, void *entry) +static int have_devanon = 0; + +void check_devanon(void) +{ + int fd; + + printk("Checking for /dev/anon on the host..."); + fd = open("/dev/anon", O_RDWR); + if(fd < 0){ + printk("Not available (open failed with errno %d)\n", errno); + return; + } + + printk("OK\n"); + have_devanon = 1; +} + +static int create_anon_file(unsigned long len) { - void *loc, *start; - char *driver; - int err, offset; - - if(region->start != -1){ - err = reserve_vm(region->start, - region->start + region->len, entry); - if(err){ - printk("setup_region : failed to reserve " - "0x%x - 0x%x for driver '%s'\n", - region->start, - region->start + region->len, - region->driver); - return(-1); - } - } - else region->start = get_vm(region->len); - if(region->start == 0){ - if(region->driver == NULL) driver = "physmem"; - else driver = region->driver; - printk("setup_region : failed to find vm for " - "driver '%s' (length %d)\n", driver, region->len); - return(-1); - } - if(region->start == uml_physmem){ - start = (void *) uml_reserved; - offset = uml_reserved - uml_physmem; - } - else { - start = (void *) region->start; - offset = 0; - } - - loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, region->fd, offset); - if(loc != start){ - perror("Mapping memory"); + void *addr; + int fd; + + fd = open("/dev/anon", O_RDWR); + if(fd < 0) { + os_print_error(fd, "opening /dev/anon"); exit(1); } - return(0); + + addr = mmap(NULL, len, PROT_READ | PROT_WRITE , MAP_PRIVATE, fd, 0); + if(addr == MAP_FAILED){ + os_print_error((int) addr, "mapping physmem file"); + exit(1); + } + munmap(addr, len); + + return(fd); +} + +int create_mem_file(unsigned long len) +{ + int err, fd; + + if(have_devanon) + fd = create_anon_file(len); + else fd = create_tmp_file(len); + + err = os_set_exec_close(fd, 1); + if(err < 0) + os_print_error(err, "exec_close"); + return(fd); } +struct iomem_region *iomem_regions = NULL; +int iomem_size = 0; + static int __init parse_iomem(char *str, int *add) { - struct stat buf; + struct iomem_region *new; + struct uml_stat buf; char *file, *driver; - int fd; + int fd, err; driver = str; file = strchr(str,','); if(file == NULL){ - printk("parse_iomem : failed to parse iomem\n"); - return(1); + printf("parse_iomem : failed to parse iomem\n"); + goto out; } *file = '\0'; file++; fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); if(fd < 0){ - printk("parse_iomem - Couldn't open io file, errno = %d\n", - errno); - return(1); - } - if(fstat(fd, &buf) < 0) { - printk("parse_iomem - cannot fstat file, errno = %d\n", errno); - return(1); + os_print_error(fd, "parse_iomem - Couldn't open io file"); + goto out; } - add_iomem(driver, fd, buf.st_size); + + err = os_stat_fd(fd, &buf); + if(err < 0){ + os_print_error(err, "parse_iomem - cannot stat_fd file"); + goto out_close; + } + + new = malloc(sizeof(*new)); + if(new == NULL){ + perror("Couldn't allocate iomem_region struct"); + goto out_close; + } + + *new = ((struct iomem_region) { .next = iomem_regions, + .driver = driver, + .fd = fd, + .size = buf.ust_size, + .phys = 0, + .virt = 0 }); + iomem_regions = new; + iomem_size += new->size + UM_KERN_PAGE_SIZE; + return(0); + out_close: + os_close_file(fd); + out: + return(1); } __uml_setup("iomem=", parse_iomem, @@ -153,73 +192,20 @@ __uml_setup("iomem=", parse_iomem, " Configure as an IO memory region named .\n\n" ); -#ifdef notdef -int logging = 0; -int logging_fd = -1; - -int logging_line = 0; -char logging_buf[256]; - -void log(char *fmt, ...) -{ - va_list ap; - struct timeval tv; - struct openflags flags; - - if(logging == 0) return; - if(logging_fd < 0){ - flags = of_create(of_trunc(of_rdrw(OPENFLAGS()))); - logging_fd = os_open_file("log", flags, 0644); - } - gettimeofday(&tv, NULL); - sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, - tv.tv_usec); - va_start(ap, fmt); - vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); - va_end(ap); - write(logging_fd, logging_buf, strlen(logging_buf)); -} -#endif - -int map_memory(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x) -{ - struct mem_region *region = phys_region(phys); - - return(os_map_memory((void *) virt, region->fd, phys_offset(phys), len, - r, w, x)); -} - int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, int must_succeed) { - if(os_protect_memory((void *) addr, len, r, w, x) < 0){ + int err; + + err = os_protect_memory((void *) addr, len, r, w, x); + if(err < 0){ if(must_succeed) - panic("protect failed, errno = %d", errno); - else return(-errno); + panic("protect failed, err = %d", -err); + else return(err); } return(0); } -unsigned long find_iomem(char *driver, unsigned long *len_out) -{ - struct mem_region *region; - int i, n; - - n = nregions(); - for(i = 0; i < n; i++){ - region = regions[i]; - if(region == NULL) continue; - if((region->driver != NULL) && - !strcmp(region->driver, driver)){ - *len_out = region->len; - return(region->start); - } - } - *len_out = 0; - return 0; -} - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/kernel/physmem.c 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,349 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/ghash.h" +#include "linux/slab.h" +#include "linux/vmalloc.h" +#include "linux/bootmem.h" +#include "asm/types.h" +#include "asm/pgtable.h" +#include "kern_util.h" +#include "user_util.h" +#include "mode_kern.h" +#include "mem.h" +#include "mem_user.h" +#include "os.h" +#include "kern.h" +#include "init.h" + +#define PHYS_HASHSIZE (8192) + +struct phys_desc; + +DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc); + +struct phys_desc { + struct virtmem_ptrs virt_ptrs; + int fd; + __u64 offset; + void *virt; + unsigned long phys; +}; + +struct virtmem_table virtmem_hash; + +static int virt_cmp(void *virt1, void *virt2) +{ + return(virt1 != virt2); +} + +static int virt_hash(void *virt) +{ + unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT; + return(addr % PHYS_HASHSIZE); +} + +DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, + virt_hash); + +int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) +{ + struct phys_desc *desc; + unsigned long phys; + int err; + + virt = (void *) ((unsigned long) virt & PAGE_MASK); + err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); + if(err) + goto out; + + phys = __pa(virt); + if(find_virtmem_hash(&virtmem_hash, virt) != NULL) + panic("Address 0x%p is already substituted\n", virt); + + err = -ENOMEM; + desc = kmalloc(sizeof(*desc), GFP_ATOMIC); + if(desc == NULL) + goto out; + + *desc = ((struct phys_desc) { .virt_ptrs = { NULL, NULL }, + .fd = fd, + .offset = offset, + .virt = virt, + .phys = __pa(virt) }); + insert_virtmem_hash(&virtmem_hash, desc); + err = 0; + out: + return(err); +} + +static int physmem_fd = -1; + +int physmem_remove_mapping(void *virt) +{ + struct phys_desc *desc; + int err; + + virt = (void *) ((unsigned long) virt & PAGE_MASK); + desc = find_virtmem_hash(&virtmem_hash, virt); + if(desc == NULL) + return(0); + + remove_virtmem_hash(&virtmem_hash, desc); + kfree(desc); + + err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); + if(err) + panic("Failed to unmap block device page from physical memory, " + "errno = %d", -err); + return(1); +} + +void arch_free_page(struct page *page, int order) +{ + void *virt; + int i; + + for(i = 0; i < 1 << order; i++){ + virt = __va(page_to_phys(page + i)); + physmem_remove_mapping(virt); + } +} + +int is_remapped(void *virt) +{ + return(find_virtmem_hash(&virtmem_hash, virt) != NULL); +} + +/* Changed during early boot */ +unsigned long high_physmem; + +extern unsigned long physmem_size; + +void *to_virt(unsigned long phys) +{ + return((void *) uml_physmem + phys); +} + +unsigned long to_phys(void *virt) +{ + return(((unsigned long) virt) - uml_physmem); +} + +int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) +{ + struct page *p, *map; + unsigned long phys_len, phys_pages, highmem_len, highmem_pages; + unsigned long iomem_len, iomem_pages, total_len, total_pages; + int i; + + phys_pages = physmem >> PAGE_SHIFT; + phys_len = phys_pages * sizeof(struct page); + + iomem_pages = iomem >> PAGE_SHIFT; + iomem_len = iomem_pages * sizeof(struct page); + + highmem_pages = highmem >> PAGE_SHIFT; + highmem_len = highmem_pages * sizeof(struct page); + + total_pages = phys_pages + iomem_pages + highmem_pages; + total_len = phys_len + iomem_pages + highmem_len; + + if(kmalloc_ok){ + map = kmalloc(total_len, GFP_KERNEL); + if(map == NULL) + map = vmalloc(total_len); + } + else map = alloc_bootmem_low_pages(total_len); + + if(map == NULL) + return(-ENOMEM); + + for(i = 0; i < total_pages; i++){ + p = &map[i]; + set_page_count(p, 0); + SetPageReserved(p); + INIT_LIST_HEAD(&p->list); + } + + mem_map = map; + max_mapnr = total_pages; + return(0); +} + +struct page *phys_to_page(const unsigned long phys) +{ + return(&mem_map[phys >> PAGE_SHIFT]); +} + +struct page *__virt_to_page(const unsigned long virt) +{ + return(&mem_map[__pa(virt) >> PAGE_SHIFT]); +} + +unsigned long page_to_phys(struct page *page) +{ + return((page - mem_map) << PAGE_SHIFT); +} + +pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + pte_t pte; + + pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); + if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); + return(pte); +} + +/* Changed during early boot */ +static unsigned long kmem_top = 0; + +unsigned long get_kmem_end(void) +{ + if(kmem_top == 0) + kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); + return(kmem_top); +} + +void map_memory(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x) +{ + __u64 offset; + int fd, err; + + fd = phys_mapping(phys, &offset); + err = os_map_memory((void *) virt, fd, offset, len, r, w, x); + if(err) + panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " + "err = %d\n", virt, fd, offset, len, r, w, x, err); +} + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) + +void setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long highmem) +{ + unsigned long reserve = reserve_end - start; + int pfn = PFN_UP(__pa(reserve_end)); + int delta = (len - reserve) >> PAGE_SHIFT; + int err, offset, bootmap_size; + + physmem_fd = create_mem_file(len + highmem); + + offset = uml_reserved - uml_physmem; + err = os_map_memory((void *) uml_reserved, physmem_fd, offset, + len - offset, 1, 1, 0); + if(err < 0){ + os_print_error(err, "Mapping memory"); + exit(1); + } + + bootmap_size = init_bootmem(pfn, pfn + delta); + free_bootmem(__pa(reserve_end) + bootmap_size, + len - bootmap_size - reserve); +} + +int phys_mapping(unsigned long phys, __u64 *offset_out) +{ + struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, + __va(phys & PAGE_MASK)); + int fd = -1; + + if(desc != NULL){ + fd = desc->fd; + *offset_out = desc->offset; + } + else if(phys < physmem_size){ + fd = physmem_fd; + *offset_out = phys; + } + else if(phys < __pa(end_iomem)){ + struct iomem_region *region = iomem_regions; + + while(region != NULL){ + if((phys >= region->phys) && + (phys < region->phys + region->size)){ + fd = region->fd; + *offset_out = phys - region->phys; + break; + } + region = region->next; + } + } + else if(phys < __pa(end_iomem) + highmem){ + fd = physmem_fd; + *offset_out = phys - iomem_size; + } + + return(fd); +} + +static int __init uml_mem_setup(char *line, int *add) +{ + char *retptr; + physmem_size = memparse(line,&retptr); + return 0; +} +__uml_setup("mem=", uml_mem_setup, +"mem=\n" +" This controls how much \"physical\" memory the kernel allocates\n" +" for the system. The size is specified as a number followed by\n" +" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +" This is not related to the amount of memory in the host. It can\n" +" be more, and the excess, if it's ever used, will just be swapped out.\n" +" Example: mem=64M\n\n" +); + +unsigned long find_iomem(char *driver, unsigned long *len_out) +{ + struct iomem_region *region = iomem_regions; + + while(region != NULL){ + if(!strcmp(region->driver, driver)){ + *len_out = region->size; + return(region->virt); + } + } + + return(0); +} + +int setup_iomem(void) +{ + struct iomem_region *region = iomem_regions; + unsigned long iomem_start = high_physmem + PAGE_SIZE; + int err; + + while(region != NULL){ + err = os_map_memory((void *) iomem_start, region->fd, 0, + region->size, 1, 1, 0); + if(err) + printk("Mapping iomem region for driver '%s' failed, " + "errno = %d\n", region->driver, -err); + else { + region->virt = iomem_start; + region->phys = __pa(region->virt); + } + + iomem_start += region->size + PAGE_SIZE; + region = region->next; + } + + return(0); +} + +__initcall(setup_iomem); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/kernel/process.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/arch/um/kernel/process.c 2004-02-12 00:40:26.000000000 -0800 @@ -9,12 +9,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -58,7 +56,11 @@ void init_new_thread_signals(int altstac { int flags = altstack ? SA_ONSTACK : 0; - set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, + /* NODEFER is set here because SEGV isn't turned back on when the + * handler is ready to receive signals. This causes any segfault + * during a copy_user to kill the process because the fault is blocked. + */ + set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags | SA_NODEFER, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); @@ -72,7 +74,6 @@ void init_new_thread_signals(int altstac SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGUSR2, (__sighandler_t) sig_handler, SA_NOMASK | flags, -1); - (void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0); signal(SIGHUP, SIG_IGN); init_irq_signals(altstack); @@ -123,11 +124,12 @@ int start_fork_tramp(void *thread_arg, u /* Start the process and wait for it to kill itself */ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); if(new_pid < 0) return(-errno); - while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ; + while(((err = waitpid(new_pid, &status, 0)) < 0) && (errno == EINTR)) ; if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", errno); if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) - panic("outer trampoline didn't exit with SIGKILL"); + panic("outer trampoline didn't exit with SIGKILL, " + "status = %d", status); return(arg.pid); } @@ -138,7 +140,7 @@ void suspend_new_thread(int fd) os_stop_process(os_getpid()); - if(read(fd, &c, sizeof(c)) != sizeof(c)) + if(os_read_file(fd, &c, sizeof(c)) != sizeof(c)) panic("read failed in suspend_new_thread"); } @@ -233,7 +235,7 @@ int run_kernel_thread(int (*fn)(void *), int n; *jmp_ptr = &buf; - n = setjmp(buf); + n = sigsetjmp(buf, 1); if(n != 0) return(n); (*fn)(arg); @@ -273,7 +275,7 @@ int can_do_skas(void) stop_ptraced_child(pid, stack, 1); printf("Checking for /proc/mm..."); - if(access("/proc/mm", W_OK)){ + if(os_access("/proc/mm", OS_ACC_W_OK) < 0){ printf("not found\n"); ret = 0; } --- linux-2.6.3-rc2/arch/um/kernel/process_kern.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/process_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -16,6 +16,7 @@ #include "linux/module.h" #include "linux/init.h" #include "linux/capability.h" +#include "linux/spinlock.h" #include "asm/unistd.h" #include "asm/mman.h" #include "asm/segment.h" @@ -23,7 +24,6 @@ #include "asm/pgtable.h" #include "asm/processor.h" #include "asm/tlbflush.h" -#include "asm/spinlock.h" #include "asm/uaccess.h" #include "asm/user.h" #include "user_util.h" @@ -52,17 +52,12 @@ struct cpu_task cpu_tasks[NR_CPUS] = { [ struct task_struct *get_task(int pid, int require) { - struct task_struct *task, *ret; + struct task_struct *ret; - ret = NULL; read_lock(&tasklist_lock); - for_each_process(task){ - if(task->pid == pid){ - ret = task; - break; - } - } + ret = find_task_by_pid(pid); read_unlock(&tasklist_lock); + if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); return(ret); } @@ -95,7 +90,8 @@ unsigned long alloc_stack(int order, int int flags = GFP_KERNEL; if(atomic) flags |= GFP_ATOMIC; - if((page = __get_free_pages(flags, order)) == 0) + page = __get_free_pages(flags, order); + if(page == 0) return(0); stack_protections(page); return(page); @@ -103,13 +99,15 @@ unsigned long alloc_stack(int order, int int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { - struct task_struct *p; + int pid; current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; - p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL); - if(IS_ERR(p)) panic("do_fork failed in kernel_thread"); - return(p->pid); + pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, + NULL); + if(pid < 0) + panic("do_fork failed in kernel_thread, errno = %d", pid); + return(pid); } void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -129,7 +127,7 @@ void set_current(void *t) { external_pid(task), task }); } -void *switch_to(void *prev, void *next, void *last) +void *_switch_to(void *prev, void *next, void *last) { return(CHOOSE_MODE(switch_to_tt(prev, next), switch_to_skas(prev, next))); @@ -149,7 +147,7 @@ void release_thread(struct task_struct * void exit_thread(void) { CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); - unprotect_stack((unsigned long) current->thread_info); + unprotect_stack((unsigned long) current_thread); } void *get_current(void) @@ -157,6 +155,10 @@ void *get_current(void) return(current); } +void prepare_to_copy(struct task_struct *tsk) +{ +} + int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct * p, struct pt_regs *regs) @@ -190,7 +192,7 @@ int current_pid(void) void default_idle(void) { - idle_timer(); + uml_idle_timer(); atomic_inc(&init_mm.mm_count); current->mm = &init_mm; @@ -367,10 +369,15 @@ int clear_user_proc(void *buf, int size) return(clear_user(buf, size)); } +int strlen_user_proc(char *str) +{ + return(strlen_user(str)); +} + int smp_sigio_handler(void) { #ifdef CONFIG_SMP - int cpu = current->thread_info->cpu; + int cpu = current_thread->cpu; IPI_handler(cpu); if(cpu != 0) return(1); @@ -385,7 +392,7 @@ int um_in_interrupt(void) int cpu(void) { - return(current->thread_info->cpu); + return(current_thread->cpu); } /* --- linux-2.6.3-rc2/arch/um/kernel/ptrace.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/kernel/ptrace.c 2004-02-12 00:40:26.000000000 -0800 @@ -311,11 +311,8 @@ void syscall_trace(void) /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ - current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0); - current->state = TASK_STOPPED; - notify_parent(current, SIGCHLD); - schedule(); + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); /* * this isn't the same as continuing with a signal, but it will do --- linux-2.6.3-rc2/arch/um/kernel/reboot.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/reboot.c 2004-02-12 00:40:26.000000000 -0800 @@ -15,6 +15,7 @@ #ifdef CONFIG_SMP static void kill_idlers(int me) { +#ifdef CONFIG_MODE_TT struct task_struct *p; int i; @@ -23,6 +24,7 @@ static void kill_idlers(int me) if((p != NULL) && (p->thread.mode.tt.extern_pid != me)) os_kill_process(p->thread.mode.tt.extern_pid, 0); } +#endif } #endif --- linux-2.6.3-rc2/arch/um/kernel/sigio_kern.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/kernel/sigio_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -6,18 +6,21 @@ #include "linux/kernel.h" #include "linux/list.h" #include "linux/slab.h" -#include "asm/irq.h" +#include "linux/signal.h" +#include "linux/interrupt.h" #include "init.h" #include "sigio.h" #include "irq_user.h" +#include "irq_kern.h" /* Protected by sigio_lock() called from write_sigio_workaround */ static int sigio_irq_fd = -1; -void sigio_interrupt(int irq, void *data, struct pt_regs *unused) +irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused) { read_sigio_fd(sigio_irq_fd); reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); + return(IRQ_HANDLED); } int write_sigio_irq(int fd) --- linux-2.6.3-rc2/arch/um/kernel/sigio_user.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/kernel/sigio_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -26,7 +25,7 @@ int pty_output_sigio = 0; int pty_close_sigio = 0; /* Used as a flag during SIGIO testing early in boot */ -static int got_sigio = 0; +static volatile int got_sigio = 0; void __init handler(int sig) { @@ -45,7 +44,7 @@ static void openpty_cb(void *arg) info->err = 0; if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) - info->err = errno; + info->err = -errno; } void __init check_one_sigio(void (*proc)(int, int)) @@ -53,11 +52,11 @@ void __init check_one_sigio(void (*proc) struct sigaction old, new; struct termios tt; struct openpty_arg pty = { .master = -1, .slave = -1 }; - int master, slave, flags; + int master, slave, err; initial_thread_cb(openpty_cb, &pty); if(pty.err){ - printk("openpty failed, errno = %d\n", pty.err); + printk("openpty failed, errno = %d\n", -pty.err); return; } @@ -69,23 +68,16 @@ void __init check_one_sigio(void (*proc) return; } + /* XXX These can fail with EINTR */ if(tcgetattr(master, &tt) < 0) panic("check_sigio : tcgetattr failed, errno = %d\n", errno); cfmakeraw(&tt); if(tcsetattr(master, TCSADRAIN, &tt) < 0) panic("check_sigio : tcsetattr failed, errno = %d\n", errno); - if((flags = fcntl(master, F_GETFL)) < 0) - panic("tty_fds : fcntl F_GETFL failed, errno = %d\n", errno); - - if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || - (fcntl(master, F_SETOWN, os_getpid()) < 0)) - panic("check_sigio : fcntl F_SETFL or F_SETOWN failed, " - "errno = %d\n", errno); - - if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) - panic("check_sigio : fcntl F_SETFL failed, errno = %d\n", - errno); + err = os_sigio_async(master, slave); + if(err < 0) + panic("tty_fds : sigio_async failed, err = %d\n", -err); if(sigaction(SIGIO, NULL, &old) < 0) panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); @@ -97,8 +89,8 @@ void __init check_one_sigio(void (*proc) got_sigio = 0; (*proc)(master, slave); - close(master); - close(slave); + os_close_file(master); + os_close_file(slave); if(sigaction(SIGIO, &old, NULL) < 0) panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); @@ -112,25 +104,25 @@ static void tty_output(int master, int s printk("Checking that host ptys support output SIGIO..."); memset(buf, 0, sizeof(buf)); - while(write(master, buf, sizeof(buf)) > 0) ; + + while(os_write_file(master, buf, sizeof(buf)) > 0) ; if(errno != EAGAIN) panic("check_sigio : write failed, errno = %d\n", errno); - - while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; + while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; if(got_sigio){ printk("Yes\n"); pty_output_sigio = 1; } - else if(errno == EAGAIN) printk("No, enabling workaround\n"); - else panic("check_sigio : read failed, errno = %d\n", errno); + else if(n == -EAGAIN) printk("No, enabling workaround\n"); + else panic("check_sigio : read failed, err = %d\n", n); } static void tty_close(int master, int slave) { printk("Checking that host ptys support SIGIO on close..."); - close(slave); + os_close_file(slave); if(got_sigio){ printk("Yes\n"); pty_close_sigio = 1; @@ -140,7 +132,8 @@ static void tty_close(int master, int sl void __init check_sigio(void) { - if(access("/dev/ptmx", R_OK) && access("/dev/ptyp0", R_OK)){ + if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && + (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ printk("No pseudo-terminals available - skipping pty SIGIO " "check\n"); return; @@ -201,11 +194,10 @@ static int write_sigio_thread(void *unus p = &fds->poll[i]; if(p->revents == 0) continue; if(p->fd == sigio_private[1]){ - n = read(sigio_private[1], &c, sizeof(c)); + n = os_read_file(sigio_private[1], &c, sizeof(c)); if(n != sizeof(c)) printk("write_sigio_thread : " - "read failed, errno = %d\n", - errno); + "read failed, err = %d\n", -n); tmp = current_poll; current_poll = next_poll; next_poll = tmp; @@ -218,10 +210,10 @@ static int write_sigio_thread(void *unus (fds->used - i) * sizeof(*fds->poll)); } - n = write(respond_fd, &c, sizeof(c)); + n = os_write_file(respond_fd, &c, sizeof(c)); if(n != sizeof(c)) printk("write_sigio_thread : write failed, " - "errno = %d\n", errno); + "err = %d\n", -n); } } } @@ -252,15 +244,15 @@ static void update_thread(void) char c; flags = set_signals(0); - n = write(sigio_private[0], &c, sizeof(c)); + n = os_write_file(sigio_private[0], &c, sizeof(c)); if(n != sizeof(c)){ - printk("update_thread : write failed, errno = %d\n", errno); + printk("update_thread : write failed, err = %d\n", -n); goto fail; } - n = read(sigio_private[0], &c, sizeof(c)); + n = os_read_file(sigio_private[0], &c, sizeof(c)); if(n != sizeof(c)){ - printk("update_thread : read failed, errno = %d\n", errno); + printk("update_thread : read failed, err = %d\n", -n); goto fail; } @@ -271,10 +263,10 @@ static void update_thread(void) if(write_sigio_pid != -1) os_kill_process(write_sigio_pid, 1); write_sigio_pid = -1; - close(sigio_private[0]); - close(sigio_private[1]); - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); + os_close_file(sigio_private[0]); + os_close_file(sigio_private[1]); + os_close_file(write_sigio_fds[0]); + os_close_file(write_sigio_fds[1]); sigio_unlock(); set_signals(flags); } @@ -369,15 +361,15 @@ void write_sigio_workaround(void) goto out; err = os_pipe(write_sigio_fds, 1, 1); - if(err){ + if(err < 0){ printk("write_sigio_workaround - os_pipe 1 failed, " - "errno = %d\n", -err); + "err = %d\n", -err); goto out; } err = os_pipe(sigio_private, 1, 1); - if(err){ + if(err < 0){ printk("write_sigio_workaround - os_pipe 2 failed, " - "errno = %d\n", -err); + "err = %d\n", -err); goto out_close1; } if(setup_initial_poll(sigio_private[1])) @@ -399,11 +391,11 @@ void write_sigio_workaround(void) os_kill_process(write_sigio_pid, 1); write_sigio_pid = -1; out_close2: - close(sigio_private[0]); - close(sigio_private[1]); + os_close_file(sigio_private[0]); + os_close_file(sigio_private[1]); out_close1: - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); + os_close_file(write_sigio_fds[0]); + os_close_file(write_sigio_fds[1]); sigio_unlock(); } @@ -412,10 +404,16 @@ int read_sigio_fd(int fd) int n; char c; - n = read(fd, &c, sizeof(c)); + n = os_read_file(fd, &c, sizeof(c)); if(n != sizeof(c)){ - printk("read_sigio_fd - read failed, errno = %d\n", errno); - return(-errno); + if(n < 0) { + printk("read_sigio_fd - read failed, err = %d\n", -n); + return(n); + } + else { + printk("read_sigio_fd - short read, bytes = %d\n", n); + return(-EIO); + } } return(n); } --- linux-2.6.3-rc2/arch/um/kernel/signal_kern.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/kernel/signal_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -36,7 +36,7 @@ static void force_segv(int sig) if(sig == SIGSEGV){ struct k_sigaction *ka; - ka = ¤t->sig->action[SIGSEGV - 1]; + ka = ¤t->sighand->action[SIGSEGV - 1]; ka->sa.sa_handler = SIG_DFL; } force_sig(SIGSEGV, current); @@ -60,10 +60,10 @@ static int handle_signal(struct pt_regs int err, ret; ret = 0; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; switch(error){ case -ERESTART_RESTARTBLOCK: - current_thread_info()->restart_block.fn = - do_no_restart_syscall; case -ERESTARTNOHAND: ret = -EINTR; break; @@ -142,7 +142,7 @@ static int kern_do_signal(struct pt_regs return(0); /* Whee! Actually deliver the signal. */ - ka = ¤t->sig->action[sig -1 ]; + ka = ¤t->sighand->action[sig -1 ]; err = handle_signal(regs, sig, ka, &info, oldset, error); if(!err) return(1); @@ -201,7 +201,7 @@ int sys_sigsuspend(int history0, int his } } -int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) { sigset_t saveset, newset; @@ -227,20 +227,59 @@ int sys_rt_sigsuspend(sigset_t *unewset, } } +int sys_sigaction(int sig, const struct old_sigaction __user *act, + struct old_sigaction __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +int sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); +} + +extern int userspace_pid[]; + static int copy_sc_from_user(struct pt_regs *to, void *from, struct arch_frame_data *arch) { int ret; ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), - copy_sc_from_user_skas(&to->regs, from)); + copy_sc_from_user_skas(userspace_pid[0], + &to->regs, from)); return(ret); } int sys_sigreturn(struct pt_regs regs) { - void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); - void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); + void __user *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); + void __user *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); spin_lock_irq(¤t->sighand->siglock); @@ -257,8 +296,8 @@ int sys_sigreturn(struct pt_regs regs) int sys_rt_sigreturn(struct pt_regs regs) { - struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); - void *fp; + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct ucontext __user *uc = sp_to_uc(sp); int sig_size = _NSIG_WORDS * sizeof(unsigned long); spin_lock_irq(¤t->sighand->siglock); @@ -266,7 +305,6 @@ int sys_rt_sigreturn(struct pt_regs regs sigdelsetmask(¤t->blocked, ~_BLOCKABLE); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext)); copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, &signal_frame_si.common.arch); return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); --- linux-2.6.3-rc2/arch/um/kernel/skas/include/mode.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/skas/include/mode.h 2004-02-12 00:40:26.000000000 -0800 @@ -12,14 +12,16 @@ extern unsigned long exec_fpx_regs[]; extern int have_fpx_regs; extern void user_time_init_skas(void); -extern int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr); -extern int copy_sc_to_user_skas(void *to_ptr, void *fp, +extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, + void *from_ptr); +extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, union uml_pt_regs *regs, unsigned long fault_addr, int fault_type); extern void sig_handler_common_skas(int sig, void *sc_ptr); extern void halt_skas(void); extern void reboot_skas(void); extern void kill_off_processes_skas(void); +extern int is_skas_winch(int pid, int fd, void *data); #endif --- linux-2.6.3-rc2/arch/um/kernel/skas/include/skas.h 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/kernel/skas/include/skas.h 2004-02-12 00:40:26.000000000 -0800 @@ -8,7 +8,7 @@ #include "sysdep/ptrace.h" -extern int userspace_pid; +extern int userspace_pid[]; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -32,7 +32,7 @@ extern int singlestepping_skas(void); extern int new_mm(int from); extern void save_registers(union uml_pt_regs *regs); extern void restore_registers(union uml_pt_regs *regs); -extern void start_userspace(void); +extern void start_userspace(int cpu); extern void init_registers(int pid); #endif --- linux-2.6.3-rc2/arch/um/kernel/skas/include/uaccess.h 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/skas/include/uaccess.h 2004-02-12 00:40:26.000000000 -0800 @@ -6,20 +6,10 @@ #ifndef __SKAS_UACCESS_H #define __SKAS_UACCESS_H -#include "linux/string.h" -#include "linux/sched.h" -#include "linux/err.h" -#include "asm/processor.h" -#include "asm/pgtable.h" -#include "asm/errno.h" -#include "asm/current.h" -#include "asm/a.out.h" -#include "kern_util.h" - #define access_ok_skas(type, addr, size) \ ((segment_eq(get_fs(), KERNEL_DS)) || \ (((unsigned long) (addr) < TASK_SIZE) && \ - ((unsigned long) (addr) + (size) < TASK_SIZE))) + ((unsigned long) (addr) + (size) <= TASK_SIZE))) static inline int verify_area_skas(int type, const void * addr, unsigned long size) @@ -27,197 +17,12 @@ static inline int verify_area_skas(int t return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); } -static inline unsigned long maybe_map(unsigned long virt, int is_write) -{ - pte_t pte; - - void *phys = um_virt_to_phys(current, virt, &pte); - int dummy_code; - - if(IS_ERR(phys) || (is_write && !pte_write(pte))){ - if(handle_page_fault(virt, 0, is_write, 0, &dummy_code)) - return(0); - phys = um_virt_to_phys(current, virt, NULL); - } - return((unsigned long) __va((unsigned long) phys)); -} - -static inline int buffer_op(unsigned long addr, int len, - int (*op)(unsigned long addr, int len, void *arg), - void *arg) -{ - int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); - int remain = len, n; - - n = (*op)(addr, size, arg); - if(n != 0) - return(n < 0 ? remain : 0); - - addr += size; - remain -= size; - if(remain == 0) - return(0); - - while(addr < ((addr + remain) & PAGE_MASK)){ - n = (*op)(addr, PAGE_SIZE, arg); - if(n != 0) - return(n < 0 ? remain : 0); - - addr += PAGE_SIZE; - remain -= PAGE_SIZE; - } - if(remain == 0) - return(0); - - n = (*op)(addr, remain, arg); - if(n != 0) - return(n < 0 ? remain : 0); - return(0); -} - -static inline int copy_chunk_from_user(unsigned long from, int len, void *arg) -{ - unsigned long *to_ptr = arg, to = *to_ptr; - - from = maybe_map(from, 0); - if(from == 0) - return(-1); - - memcpy((void *) to, (void *) from, len); - *to_ptr += len; - return(0); -} - -static inline int copy_from_user_skas(void *to, const void *from, int n) -{ - if(segment_eq(get_fs(), KERNEL_DS)){ - memcpy(to, from, n); - return(0); - } - - return(access_ok_skas(VERIFY_READ, from, n) ? - buffer_op((unsigned long) from, n, copy_chunk_from_user, &to) : - n); -} - -static inline int copy_chunk_to_user(unsigned long to, int len, void *arg) -{ - unsigned long *from_ptr = arg, from = *from_ptr; - - to = maybe_map(to, 1); - if(to == 0) - return(-1); - - memcpy((void *) to, (void *) from, len); - *from_ptr += len; - return(0); -} - -static inline int copy_to_user_skas(void *to, const void *from, int n) -{ - if(segment_eq(get_fs(), KERNEL_DS)){ - memcpy(to, from, n); - return(0); - } - - return(access_ok_skas(VERIFY_WRITE, to, n) ? - buffer_op((unsigned long) to, n, copy_chunk_to_user, &from) : - n); -} - -static inline int strncpy_chunk_from_user(unsigned long from, int len, - void *arg) -{ - char **to_ptr = arg, *to = *to_ptr; - int n; - - from = maybe_map(from, 0); - if(from == 0) - return(-1); - - strncpy(to, (void *) from, len); - n = strnlen(to, len); - *to_ptr += n; - - if(n < len) - return(1); - return(0); -} - -static inline int strncpy_from_user_skas(char *dst, const char *src, int count) -{ - int n; - char *ptr = dst; - - if(segment_eq(get_fs(), KERNEL_DS)){ - strncpy(dst, src, count); - return(strnlen(dst, count)); - } - - if(!access_ok_skas(VERIFY_READ, src, 1)) - return(-EFAULT); - - n = buffer_op((unsigned long) src, count, strncpy_chunk_from_user, - &ptr); - if(n != 0) - return(-EFAULT); - return(strnlen(dst, count)); -} - -static inline int clear_chunk(unsigned long addr, int len, void *unused) -{ - addr = maybe_map(addr, 1); - if(addr == 0) - return(-1); - - memset((void *) addr, 0, len); - return(0); -} - -static inline int __clear_user_skas(void *mem, int len) -{ - return(buffer_op((unsigned long) mem, len, clear_chunk, NULL)); -} - -static inline int clear_user_skas(void *mem, int len) -{ - if(segment_eq(get_fs(), KERNEL_DS)){ - memset(mem, 0, len); - return(0); - } - - return(access_ok_skas(VERIFY_WRITE, mem, len) ? - buffer_op((unsigned long) mem, len, clear_chunk, NULL) : len); -} - -static inline int strnlen_chunk(unsigned long str, int len, void *arg) -{ - int *len_ptr = arg, n; - - str = maybe_map(str, 0); - if(str == 0) - return(-1); - - n = strnlen((void *) str, len); - *len_ptr += n; - - if(n < len) - return(1); - return(0); -} - -static inline int strnlen_user_skas(const void *str, int len) -{ - int count = 0, n; - - if(segment_eq(get_fs(), KERNEL_DS)) - return(strnlen(str, len) + 1); - - n = buffer_op((unsigned long) str, len, strnlen_chunk, &count); - if(n == 0) - return(count + 1); - return(-EFAULT); -} +extern int copy_from_user_skas(void *to, const void *from, int n); +extern int copy_to_user_skas(void *to, const void *from, int n); +extern int strncpy_from_user_skas(char *dst, const char *src, int count); +extern int __clear_user_skas(void *mem, int len); +extern int clear_user_skas(void *mem, int len); +extern int strnlen_user_skas(const void *str, int len); #endif --- linux-2.6.3-rc2/arch/um/kernel/skas/Makefile 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/kernel/skas/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -5,20 +5,24 @@ obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \ - sys-$(SUBARCH)/ + uaccess.o sys-$(SUBARCH)/ + +host-progs := util/mk_ptregs +clean-files := include/skas_ptregs.h USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) -include/skas_ptregs.h : util/mk_ptregs - util/mk_ptregs > $@ - -util/mk_ptregs : - $(MAKE) -C util +$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs + @echo -n ' Generating $@' + @$< > $@.tmp + @if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + echo ' (unchanged)'; \ + rm -f $@.tmp; \ + else \ + echo ' (updated)'; \ + mv -f $@.tmp $@; \ + fi $(USER_OBJS) : %.o: %.c $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< - -clean : - $(MAKE) -C util clean - $(RM) -f include/skas_ptregs.h --- linux-2.6.3-rc2/arch/um/kernel/skas/mem_user.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/kernel/skas/mem_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -7,6 +7,7 @@ #include #include #include "mem_user.h" +#include "mem.h" #include "user.h" #include "os.h" #include "proc_mm.h" @@ -15,12 +16,12 @@ void map(int fd, unsigned long virt, uns int r, int w, int x) { struct proc_mm_op map; - struct mem_region *region; - int prot, n; + __u64 offset; + int prot, n, phys_fd; prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); - region = phys_region(phys); + phys_fd = phys_mapping(phys, &offset); map = ((struct proc_mm_op) { .op = MM_MMAP, .u = @@ -30,12 +31,12 @@ void map(int fd, unsigned long virt, uns .prot = prot, .flags = MAP_SHARED | MAP_FIXED, - .fd = region->fd, - .offset = phys_offset(phys) + .fd = phys_fd, + .offset = offset } } } ); n = os_write_file(fd, &map, sizeof(map)); if(n != sizeof(map)) - printk("map : /proc/mm map failed, errno = %d\n", errno); + printk("map : /proc/mm map failed, err = %d\n", -n); } int unmap(int fd, void *addr, int len) @@ -49,8 +50,13 @@ int unmap(int fd, void *addr, int len) { .addr = (unsigned long) addr, .len = len } } } ); n = os_write_file(fd, &unmap, sizeof(unmap)); - if((n != 0) && (n != sizeof(unmap))) - return(-errno); + if(n != sizeof(unmap)) { + if(n < 0) + return(n); + else if(n > 0) + return(-EIO); + } + return(0); } @@ -71,11 +77,15 @@ int protect(int fd, unsigned long addr, .prot = prot } } } ); n = os_write_file(fd, &protect, sizeof(protect)); - if((n != 0) && (n != sizeof(protect))){ + if(n != sizeof(protect)) { + if(n == 0) return(0); + if(must_succeed) - panic("protect failed, errno = %d", errno); - return(-errno); + panic("protect failed, err = %d", -n); + + return(-EIO); } + return(0); } --- linux-2.6.3-rc2/arch/um/kernel/skas/mmu.c 2003-06-14 12:18:52.000000000 -0700 +++ 25/arch/um/kernel/skas/mmu.c 2004-02-12 00:40:26.000000000 -0800 @@ -22,9 +22,11 @@ int init_new_context_skas(struct task_st else from = -1; mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0) - panic("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); + if(mm->context.skas.mm_fd < 0){ + printk("init_new_context_skas - new_mm failed, errno = %d\n", + mm->context.skas.mm_fd); + return(mm->context.skas.mm_fd); + } return(0); } --- linux-2.6.3-rc2/arch/um/kernel/skas/process.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/kernel/skas/process.c 2004-02-12 00:40:26.000000000 -0800 @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,18 @@ #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" +#include "chan_user.h" + +int is_skas_winch(int pid, int fd, void *data) +{ + if(pid != getpid()) + return(0); + + register_winch_irq(-1, fd, -1, data); + return(1); +} + +/* These are set once at boot time and not changed thereafter */ unsigned long exec_regs[FRAME_SIZE]; unsigned long exec_fp_regs[HOST_FP_SIZE]; @@ -48,11 +61,11 @@ static void handle_trap(int pid, union u int err, syscall_nr, status; syscall_nr = PT_SYSCALL_NR(regs->skas.regs); + UPT_SYSCALL_NR(regs) = syscall_nr; if(syscall_nr < 1){ relay_signal(SIGTRAP, regs); return; } - UPT_SYSCALL_NR(regs) = syscall_nr; err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); if(err < 0) @@ -72,8 +85,6 @@ static void handle_trap(int pid, union u handle_syscall(regs); } -int userspace_pid; - static int userspace_tramp(void *arg) { init_new_thread_signals(0); @@ -83,7 +94,11 @@ static int userspace_tramp(void *arg) return(0); } -void start_userspace(void) +/* Each element set once, and only accessed by a single processor anyway */ +#define NR_CPUS 1 +int userspace_pid[NR_CPUS]; + +void start_userspace(int cpu) { void *stack; unsigned long sp; @@ -114,21 +129,21 @@ void start_userspace(void) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid = pid; + userspace_pid[cpu] = pid; } void userspace(union uml_pt_regs *regs) { - int err, status, op; + int err, status, op, pid = userspace_pid[0]; restore_registers(regs); - err = ptrace(PTRACE_SYSCALL, userspace_pid, 0, 0); + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); if(err) panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", errno); while(1){ - err = waitpid(userspace_pid, &status, WUNTRACED); + err = waitpid(pid, &status, WUNTRACED); if(err < 0) panic("userspace - waitpid failed, errno = %d\n", errno); @@ -139,16 +154,17 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(userspace_pid); + handle_segv(pid); break; case SIGTRAP: - handle_trap(userspace_pid, regs); + handle_trap(pid, regs); break; case SIGIO: case SIGVTALRM: case SIGILL: case SIGBUS: case SIGFPE: + case SIGWINCH: user_signal(WSTOPSIG(status), regs); break; default: @@ -162,7 +178,7 @@ void userspace(union uml_pt_regs *regs) op = singlestepping_skas() ? PTRACE_SINGLESTEP : PTRACE_SYSCALL; - err = ptrace(op, userspace_pid, 0, 0); + err = ptrace(op, pid, 0, 0); if(err) panic("userspace - PTRACE_SYSCALL failed, " "errno = %d\n", errno); @@ -177,7 +193,7 @@ void new_thread(void *stack, void **swit *switch_buf_ptr = &switch_buf; *fork_buf_ptr = &fork_buf; - if(setjmp(fork_buf) == 0) + if(sigsetjmp(fork_buf, 1) == 0) new_thread_proc(stack, handler); remove_sigstack(); @@ -189,16 +205,16 @@ void thread_wait(void *sw, void *fb) *switch_buf = &buf; fork_buf = fb; - if(setjmp(buf) == 0) - longjmp(*fork_buf, 1); + if(sigsetjmp(buf, 1) == 0) + siglongjmp(*fork_buf, 1); } -static int move_registers(int int_op, int fp_op, union uml_pt_regs *regs, - unsigned long *fp_regs) +static int move_registers(int pid, int int_op, int fp_op, + union uml_pt_regs *regs, unsigned long *fp_regs) { - if(ptrace(int_op, userspace_pid, 0, regs->skas.regs) < 0) + if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) return(-errno); - if(ptrace(fp_op, userspace_pid, 0, fp_regs) < 0) + if(ptrace(fp_op, pid, 0, fp_regs) < 0) return(-errno); return(0); } @@ -217,10 +233,11 @@ void save_registers(union uml_pt_regs *r fp_regs = regs->skas.fp; } - err = move_registers(PTRACE_GETREGS, fp_op, regs, fp_regs); + err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs, + fp_regs); if(err) panic("save_registers - saving registers failed, errno = %d\n", - err); + -err); } void restore_registers(union uml_pt_regs *regs) @@ -237,10 +254,11 @@ void restore_registers(union uml_pt_regs fp_regs = regs->skas.fp; } - err = move_registers(PTRACE_SETREGS, fp_op, regs, fp_regs); + err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs, + fp_regs); if(err) panic("restore_registers - saving registers failed, " - "errno = %d\n", err); + "errno = %d\n", -err); } void switch_threads(void *me, void *next) @@ -248,8 +266,8 @@ void switch_threads(void *me, void *next jmp_buf my_buf, **me_ptr = me, *next_buf = next; *me_ptr = &my_buf; - if(setjmp(my_buf) == 0) - longjmp(*next_buf, 1); + if(sigsetjmp(my_buf, 1) == 0) + siglongjmp(*next_buf, 1); } static jmp_buf initial_jmpbuf; @@ -265,14 +283,14 @@ int start_idle_thread(void *stack, void int n; *fork_buf_ptr = &initial_jmpbuf; - n = setjmp(initial_jmpbuf); + n = sigsetjmp(initial_jmpbuf, 1); if(n == 0) new_thread_proc((void *) stack, new_thread_handler); else if(n == 1) remove_sigstack(); else if(n == 2){ (*cb_proc)(cb_arg); - longjmp(*cb_back, 1); + siglongjmp(*cb_back, 1); } else if(n == 3){ kmalloc_ok = 0; @@ -282,7 +300,7 @@ int start_idle_thread(void *stack, void kmalloc_ok = 0; return(1); } - longjmp(**switch_buf, 1); + siglongjmp(**switch_buf, 1); } void remove_sigstack(void) @@ -304,8 +322,8 @@ void initial_thread_cb_skas(void (*proc) cb_back = &here; block_signals(); - if(setjmp(here) == 0) - longjmp(initial_jmpbuf, 2); + if(sigsetjmp(here, 1) == 0) + siglongjmp(initial_jmpbuf, 2); unblock_signals(); cb_proc = NULL; @@ -316,22 +334,23 @@ void initial_thread_cb_skas(void (*proc) void halt_skas(void) { block_signals(); - longjmp(initial_jmpbuf, 3); + siglongjmp(initial_jmpbuf, 3); } void reboot_skas(void) { block_signals(); - longjmp(initial_jmpbuf, 4); + siglongjmp(initial_jmpbuf, 4); } int new_mm(int from) { struct proc_mm_op copy; - int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0); + int n, fd = os_open_file("/proc/mm", + of_cloexec(of_write(OPENFLAGS())), 0); if(fd < 0) - return(-errno); + return(fd); if(from != -1){ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, @@ -340,8 +359,9 @@ int new_mm(int from) n = os_write_file(fd, ©, sizeof(copy)); if(n != sizeof(copy)) printk("new_mm : /proc/mm copy_segments failed, " - "errno = %d\n", errno); + "err = %d\n", -n); } + return(fd); } @@ -349,7 +369,8 @@ void switch_mm_skas(int mm_fd) { int err; - err = ptrace(PTRACE_SWITCH_MM, userspace_pid, 0, mm_fd); +#warning need cpu pid in switch_mm_skas + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); if(err) panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", errno); @@ -357,7 +378,8 @@ void switch_mm_skas(int mm_fd) void kill_off_processes_skas(void) { - os_kill_process(userspace_pid, 1); +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_process(userspace_pid[0], 1); } void init_registers(int pid) --- linux-2.6.3-rc2/arch/um/kernel/skas/process_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/skas/process_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -61,11 +61,13 @@ void new_thread_handler(int sig) thread_wait(¤t->thread.mode.skas.switch_buf, current->thread.mode.skas.fork_buf); -#ifdef CONFIG_SMP - schedule_tail(NULL); -#endif + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; + /* The return value is 1 if the kernel thread execs a process, + * 0 if it just exits + */ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); if(n == 1) userspace(¤t->thread.regs.regs); @@ -93,9 +95,8 @@ void fork_handler(int sig) current->thread.mode.skas.fork_buf); force_flush_all(); -#ifdef CONFIG_SMP - schedule_tail(current->thread.prev_sched); -#endif + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; unblock_signals(); @@ -136,7 +137,7 @@ int copy_thread_skas(int nr, unsigned lo void init_idle_skas(void) { - cpu_tasks[current->thread_info->cpu].pid = os_getpid(); + cpu_tasks[current_thread->cpu].pid = os_getpid(); default_idle(); } @@ -160,11 +161,11 @@ static int start_kernel_proc(void *unuse int start_uml_skas(void) { - start_userspace(); + start_userspace(0); capture_signal_stack(); + uml_idle_timer(); init_new_thread_signals(1); - idle_timer(); init_task.thread.request.u.thread.proc = start_kernel_proc; init_task.thread.request.u.thread.arg = NULL; @@ -175,12 +176,14 @@ int start_uml_skas(void) int external_pid_skas(struct task_struct *task) { - return(userspace_pid); +#warning Need to look up userspace_pid by cpu + return(userspace_pid[0]); } int thread_pid_skas(struct task_struct *task) { - return(userspace_pid); +#warning Need to look up userspace_pid by cpu + return(userspace_pid[0]); } /* --- linux-2.6.3-rc2/arch/um/kernel/skas/syscall_kern.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/skas/syscall_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ --- linux-2.6.3-rc2/arch/um/kernel/skas/sys-i386/sigcontext.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-02-12 00:40:26.000000000 -0800 @@ -12,10 +12,9 @@ #include "kern_util.h" #include "user.h" #include "sigcontext.h" +#include "mode.h" -extern int userspace_pid; - -int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr) +int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr) { struct sigcontext sc, *from = from_ptr; unsigned long fpregs[FP_FRAME_SIZE]; @@ -41,13 +40,12 @@ int copy_sc_from_user_skas(union uml_pt_ regs->skas.regs[EIP] = sc.eip; regs->skas.regs[CS] = sc.cs; regs->skas.regs[EFL] = sc.eflags; - regs->skas.regs[UESP] = sc.esp_at_signal; regs->skas.regs[SS] = sc.ss; regs->skas.fault_addr = sc.cr2; regs->skas.fault_type = FAULT_WRITE(sc.err); regs->skas.trap_type = sc.trapno; - err = ptrace(PTRACE_SETFPREGS, userspace_pid, 0, fpregs); + err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs); if(err < 0){ printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " "errno = %d\n", errno); @@ -57,8 +55,9 @@ int copy_sc_from_user_skas(union uml_pt_ return(0); } -int copy_sc_to_user_skas(void *to_ptr, void *fp, union uml_pt_regs *regs, - unsigned long fault_addr, int fault_type) +int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, + union uml_pt_regs *regs, unsigned long fault_addr, + int fault_type) { struct sigcontext sc, *to = to_ptr; struct _fpstate *to_fp; @@ -86,7 +85,7 @@ int copy_sc_to_user_skas(void *to_ptr, v sc.err = TO_SC_ERR(fault_type); sc.trapno = regs->skas.trap_type; - err = ptrace(PTRACE_GETFPREGS, userspace_pid, 0, fpregs); + err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs); if(err < 0){ printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " "errno = %d\n", errno); --- linux-2.6.3-rc2/arch/um/kernel/skas/trap_user.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/skas/trap_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -41,8 +41,6 @@ void user_signal(int sig, union uml_pt_r { struct signal_info *info; - if(sig == SIGVTALRM) - missed_ticks[cpu()]++; regs->skas.is_user = 1; regs->skas.fault_addr = 0; regs->skas.fault_type = 0; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/kernel/skas/uaccess.c 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/stddef.h" +#include "linux/kernel.h" +#include "linux/string.h" +#include "linux/fs.h" +#include "linux/highmem.h" +#include "asm/page.h" +#include "asm/pgtable.h" +#include "asm/uaccess.h" +#include "kern_util.h" + +extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, + pte_t *pte_out); + +static unsigned long maybe_map(unsigned long virt, int is_write) +{ + pte_t pte; + int err; + + void *phys = um_virt_to_phys(current, virt, &pte); + int dummy_code; + + if(IS_ERR(phys) || (is_write && !pte_write(pte))){ + err = handle_page_fault(virt, 0, is_write, 0, &dummy_code); + if(err) + return(0); + phys = um_virt_to_phys(current, virt, NULL); + } + return((unsigned long) phys); +} + +static int do_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), void *arg) +{ + struct page *page; + int n; + + addr = maybe_map(addr, is_write); + if(addr == -1) + return(-1); + + page = phys_to_page(addr); + addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + n = (*op)(addr, len, arg); + kunmap(page); + + return(n); +} + +static int buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), + void *arg) +{ + int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); + int remain = len, n; + + n = do_op(addr, size, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + + addr += size; + remain -= size; + if(remain == 0) + return(0); + + while(addr < ((addr + remain) & PAGE_MASK)){ + n = do_op(addr, PAGE_SIZE, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + + addr += PAGE_SIZE; + remain -= PAGE_SIZE; + } + if(remain == 0) + return(0); + + n = do_op(addr, remain, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + return(0); +} + +static int copy_chunk_from_user(unsigned long from, int len, void *arg) +{ + unsigned long *to_ptr = arg, to = *to_ptr; + + memcpy((void *) to, (void *) from, len); + *to_ptr += len; + return(0); +} + +int copy_from_user_skas(void *to, const void *from, int n) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memcpy(to, from, n); + return(0); + } + + return(access_ok_skas(VERIFY_READ, from, n) ? + buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): + n); +} + +static int copy_chunk_to_user(unsigned long to, int len, void *arg) +{ + unsigned long *from_ptr = arg, from = *from_ptr; + + memcpy((void *) to, (void *) from, len); + *from_ptr += len; + return(0); +} + +int copy_to_user_skas(void *to, const void *from, int n) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memcpy(to, from, n); + return(0); + } + + return(access_ok_skas(VERIFY_WRITE, to, n) ? + buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : + n); +} + +static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) +{ + char **to_ptr = arg, *to = *to_ptr; + int n; + + strncpy(to, (void *) from, len); + n = strnlen(to, len); + *to_ptr += n; + + if(n < len) + return(1); + return(0); +} + +int strncpy_from_user_skas(char *dst, const char *src, int count) +{ + int n; + char *ptr = dst; + + if(segment_eq(get_fs(), KERNEL_DS)){ + strncpy(dst, src, count); + return(strnlen(dst, count)); + } + + if(!access_ok_skas(VERIFY_READ, src, 1)) + return(-EFAULT); + + n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, + &ptr); + if(n != 0) + return(-EFAULT); + return(strnlen(dst, count)); +} + +static int clear_chunk(unsigned long addr, int len, void *unused) +{ + memset((void *) addr, 0, len); + return(0); +} + +int __clear_user_skas(void *mem, int len) +{ + return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); +} + +int clear_user_skas(void *mem, int len) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memset(mem, 0, len); + return(0); + } + + return(access_ok_skas(VERIFY_WRITE, mem, len) ? + buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); +} + +static int strnlen_chunk(unsigned long str, int len, void *arg) +{ + int *len_ptr = arg, n; + + n = strnlen((void *) str, len); + *len_ptr += n; + + if(n < len) + return(1); + return(0); +} + +int strnlen_user_skas(const void *str, int len) +{ + int count = 0, n; + + if(segment_eq(get_fs(), KERNEL_DS)) + return(strnlen(str, len) + 1); + + n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); + if(n == 0) + return(count + 1); + return(-EFAULT); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/kernel/skas/util/Makefile 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/um/kernel/skas/util/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -1,10 +1,10 @@ all: mk_ptregs mk_ptregs : mk_ptregs.o - $(CC) -o mk_ptregs mk_ptregs.o + $(HOSTCC) -o mk_ptregs mk_ptregs.o mk_ptregs.o : mk_ptregs.c - $(CC) -c $< + $(HOSTCC) -c $< clean : $(RM) -f mk_ptregs *.o *~ --- linux-2.6.3-rc2/arch/um/kernel/skas/util/mk_ptregs.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/skas/util/mk_ptregs.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,3 +1,4 @@ +#include #include #include --- linux-2.6.3-rc2/arch/um/kernel/smp.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/smp.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,9 +1,15 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ #include "linux/config.h" +#include "linux/percpu.h" +#include "asm/pgalloc.h" +#include "asm/tlb.h" + +/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */ +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #ifdef CONFIG_SMP @@ -23,7 +29,7 @@ #include "os.h" /* CPU online map, set by smp_boot_cpus */ -unsigned long cpu_online_map = cpumask_of_cpu(0); +unsigned long cpu_online_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_online_map); @@ -55,7 +61,7 @@ struct task_struct *idle_threads[NR_CPUS void smp_send_reschedule(int cpu) { - write(cpu_data[cpu].ipi_pipe[1], "R", 1); + os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); num_reschedules_sent++; } @@ -100,35 +106,34 @@ void smp_send_stop(void) printk(KERN_INFO "Stopping all CPUs..."); for(i = 0; i < num_online_cpus(); i++){ - if(i == current->thread_info->cpu) + if(i == current_thread->cpu) continue; - write(cpu_data[i].ipi_pipe[1], "S", 1); + os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); } printk("done\n"); } -static cpumask_t smp_commenced_mask; -static cpumask_t smp_callin_map = CPU_MASK_NONE; +static cpumask_t smp_commenced_mask = CPU_MASK_NONE; +static cpumask_t cpu_callin_map = CPU_MASK_NONE; static int idle_proc(void *cpup) { int cpu = (int) cpup, err; err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); - if(err) - panic("CPU#%d failed to create IPI pipe, errno = %d", cpu, - -err); + if(err < 0) + panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); activate_ipi(cpu_data[cpu].ipi_pipe[0], current->thread.mode.tt.extern_pid); wmb(); - if (cpu_test_and_set(cpu, &smp_callin_map)) { + if (cpu_test_and_set(cpu, cpu_callin_map)) { printk("huh, CPU#%d already present??\n", cpu); BUG(); } - while (!cpu_isset(cpu, &smp_commenced_mask)) + while (!cpu_isset(cpu, smp_commenced_mask)) cpu_relax(); cpu_set(cpu, cpu_online_map); @@ -143,16 +148,20 @@ static struct task_struct *idle_thread(i current->thread.request.u.thread.proc = idle_proc; current->thread.request.u.thread.arg = (void *) cpu; - new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL); - if(IS_ERR(new_task)) panic("do_fork failed in idle_thread"); + new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, + NULL); + if(IS_ERR(new_task)) + panic("copy_process failed in idle_thread, error = %ld", + PTR_ERR(new_task)); cpu_tasks[cpu] = ((struct cpu_task) { .pid = new_task->thread.mode.tt.extern_pid, .task = new_task } ); idle_threads[cpu] = new_task; - CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c, + CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c, sizeof(c)), ({ panic("skas mode doesn't support SMP"); })); + wake_up_forked_process(new_task); return(new_task); } @@ -160,15 +169,17 @@ void smp_prepare_cpus(unsigned int maxcp { struct task_struct *idle; unsigned long waittime; - int err, cpu; + int err, cpu, me = smp_processor_id(); - cpu_set(0, cpu_online_map); - cpu_set(0, smp_callin_map); + cpu_clear(me, cpu_online_map); + cpu_set(me, cpu_online_map); + cpu_set(me, cpu_callin_map); - err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); - if(err) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); + err = os_pipe(cpu_data[me].ipi_pipe, 1, 1); + if(err < 0) + panic("CPU#0 failed to create IPI pipe, errno = %d", -err); - activate_ipi(cpu_data[0].ipi_pipe[0], + activate_ipi(cpu_data[me].ipi_pipe[0], current->thread.mode.tt.extern_pid); for(cpu = 1; cpu < ncpus; cpu++){ @@ -180,10 +191,10 @@ void smp_prepare_cpus(unsigned int maxcp unhash_process(idle); waittime = 200000000; - while (waittime-- && !cpu_isset(cpu, smp_callin_map)) + while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) cpu_relax(); - if (cpu_isset(cpu, smp_callin_map)) + if (cpu_isset(cpu, cpu_callin_map)) printk("done\n"); else printk("failed\n"); } @@ -216,7 +227,7 @@ void IPI_handler(int cpu) int fd; fd = cpu_data[cpu].ipi_pipe[0]; - while (read(fd, &c, 1) == 1) { + while (os_read_file(fd, &c, 1) == 1) { switch (c) { case 'C': smp_call_function_slave(cpu); @@ -273,9 +284,9 @@ int smp_call_function(void (*_func)(void info = _info; for (i=0;ithread_info->cpu) && + if((i != current_thread->cpu) && cpu_isset(i, cpu_online_map)) - write(cpu_data[i].ipi_pipe[1], "C", 1); + os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); while (atomic_read(&scf_started) != cpus) barrier(); --- linux-2.6.3-rc2/arch/um/kernel/syscall_kern.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/syscall_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -35,39 +35,40 @@ long um_mount(char * dev_name, char * di long sys_fork(void) { - struct task_struct *p; + long ret; current->thread.forking = 1; - p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); current->thread.forking = 0; - return(IS_ERR(p) ? PTR_ERR(p) : p->pid); + return(ret); } -long sys_clone(unsigned long clone_flags, unsigned long newsp) +long sys_clone(unsigned long clone_flags, unsigned long newsp, + int *parent_tid, int *child_tid) { - struct task_struct *p; + long ret; current->thread.forking = 1; - p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL); + ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); current->thread.forking = 0; - return(IS_ERR(p) ? PTR_ERR(p) : p->pid); + return(ret); } long sys_vfork(void) { - struct task_struct *p; + long ret; current->thread.forking = 1; - p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, + NULL); current->thread.forking = 0; - return(IS_ERR(p) ? PTR_ERR(p) : p->pid); + return(ret); } /* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long fd, + unsigned long pgoff) { int error = -EBADF; struct file * file = NULL; @@ -79,9 +80,9 @@ static inline long do_mmap2( goto out; } - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -93,7 +94,7 @@ long sys_mmap2(unsigned long addr, unsig unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - return do_mmap2(addr, len, prot, flags, fd, pgoff); + return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); } /* @@ -120,7 +121,8 @@ int old_mmap(unsigned long addr, unsigne if (offset & ~PAGE_MASK) goto out; - err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + err = do_mmap2(current->mm, addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); out: return err; } @@ -135,43 +137,12 @@ int sys_pipe(unsigned long * fildes) error = do_pipe(fd); if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) + if (copy_to_user(fildes, fd, sizeof(fd))) error = -EFAULT; } return error; } -int sys_sigaction(int sig, const struct old_sigaction *act, - struct old_sigaction *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - /* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. * @@ -253,7 +224,7 @@ int sys_ipc (uint call, int first, int s return sys_shmctl (first, second, (struct shmid_ds *) ptr); default: - return -EINVAL; + return -ENOSYS; } } @@ -302,11 +273,6 @@ int sys_olduname(struct oldold_utsname * return error; } -int sys_sigaltstack(const stack_t *uss, stack_t *uoss) -{ - return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); -} - long execute_syscall(void *r) { return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); --- linux-2.6.3-rc2/arch/um/kernel/sys_call_table.c 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/um/kernel/sys_call_table.c 2004-02-12 00:40:57.000000000 -0800 @@ -5,9 +5,9 @@ #include "linux/config.h" #include "linux/unistd.h" -#include "linux/version.h" #include "linux/sys.h" #include "linux/swap.h" +#include "linux/syscalls.h" #include "linux/sysctl.h" #include "asm/signal.h" #include "sysdep/syscalls.h" @@ -152,7 +152,6 @@ extern syscall_handler_t sys_mlockall; extern syscall_handler_t sys_munlockall; extern syscall_handler_t sys_sched_setparam; extern syscall_handler_t sys_sched_getparam; -extern syscall_handler_t sys_sched_setscheduler; extern syscall_handler_t sys_sched_getscheduler; extern syscall_handler_t sys_sched_get_priority_max; extern syscall_handler_t sys_sched_get_priority_min; @@ -219,15 +218,30 @@ extern syscall_handler_t sys_getdents64; extern syscall_handler_t sys_gettid; extern syscall_handler_t sys_readahead; extern syscall_handler_t sys_tkill; +extern syscall_handler_t sys_setxattr; +extern syscall_handler_t sys_lsetxattr; +extern syscall_handler_t sys_fsetxattr; +extern syscall_handler_t sys_getxattr; +extern syscall_handler_t sys_lgetxattr; +extern syscall_handler_t sys_fgetxattr; +extern syscall_handler_t sys_listxattr; +extern syscall_handler_t sys_llistxattr; +extern syscall_handler_t sys_flistxattr; +extern syscall_handler_t sys_removexattr; +extern syscall_handler_t sys_lremovexattr; +extern syscall_handler_t sys_fremovexattr; extern syscall_handler_t sys_sendfile64; extern syscall_handler_t sys_futex; extern syscall_handler_t sys_sched_setaffinity; extern syscall_handler_t sys_sched_getaffinity; +extern syscall_handler_t sys_set_thread_area; +extern syscall_handler_t sys_get_thread_area; extern syscall_handler_t sys_io_setup; extern syscall_handler_t sys_io_destroy; extern syscall_handler_t sys_io_getevents; extern syscall_handler_t sys_io_submit; extern syscall_handler_t sys_io_cancel; +extern syscall_handler_t sys_fadvise64; extern syscall_handler_t sys_exit_group; extern syscall_handler_t sys_lookup_dcookie; extern syscall_handler_t sys_epoll_create; @@ -235,6 +249,20 @@ extern syscall_handler_t sys_epoll_ctl; extern syscall_handler_t sys_epoll_wait; extern syscall_handler_t sys_remap_file_pages; extern syscall_handler_t sys_set_tid_address; +extern syscall_handler_t sys_timer_create; +extern syscall_handler_t sys_timer_settime; +extern syscall_handler_t sys_timer_gettime; +extern syscall_handler_t sys_timer_getoverrun; +extern syscall_handler_t sys_timer_delete; +extern syscall_handler_t sys_clock_settime; +extern syscall_handler_t sys_clock_gettime; +extern syscall_handler_t sys_clock_getres; +extern syscall_handler_t sys_clock_nanosleep; +extern syscall_handler_t sys_statfs64; +extern syscall_handler_t sys_fstatfs64; +extern syscall_handler_t sys_tgkill; +extern syscall_handler_t sys_utimes; +extern syscall_handler_t sys_fadvise64_64; #ifdef CONFIG_NFSD #define NFSSERVCTL sys_nfsservctl @@ -246,7 +274,7 @@ extern syscall_handler_t um_mount; extern syscall_handler_t um_time; extern syscall_handler_t um_stime; -#define LAST_GENERIC_SYSCALL __NR_set_tid_address +#define LAST_GENERIC_SYSCALL __NR_vserver #if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL #define LAST_SYSCALL LAST_GENERIC_SYSCALL @@ -268,9 +296,9 @@ syscall_handler_t *sys_call_table[] = { [ __NR_creat ] = sys_creat, [ __NR_link ] = sys_link, [ __NR_unlink ] = sys_unlink, + [ __NR_execve ] = (syscall_handler_t *) sys_execve, /* declared differently in kern_util.h */ - [ __NR_execve ] = (syscall_handler_t *) sys_execve, [ __NR_chdir ] = sys_chdir, [ __NR_time ] = um_time, [ __NR_mknod ] = sys_mknod, @@ -413,7 +441,7 @@ syscall_handler_t *sys_call_table[] = { [ __NR_munlockall ] = sys_munlockall, [ __NR_sched_setparam ] = sys_sched_setparam, [ __NR_sched_getparam ] = sys_sched_getparam, - [ __NR_sched_setscheduler ] = sys_sched_setscheduler, + [ __NR_sched_setscheduler ] = (syscall_handler_t *) sys_sched_setscheduler, [ __NR_sched_getscheduler ] = sys_sched_getscheduler, [ __NR_sched_yield ] = (syscall_handler_t *) yield, [ __NR_sched_get_priority_max ] = sys_sched_get_priority_max, @@ -455,32 +483,37 @@ syscall_handler_t *sys_call_table[] = { [ __NR_stat64 ] = sys_stat64, [ __NR_lstat64 ] = sys_lstat64, [ __NR_fstat64 ] = sys_fstat64, - [ __NR_fcntl64 ] = sys_fcntl64, [ __NR_getdents64 ] = sys_getdents64, + [ __NR_fcntl64 ] = sys_fcntl64, + [ 223 ] = sys_ni_syscall, [ __NR_gettid ] = sys_gettid, [ __NR_readahead ] = sys_readahead, - [ __NR_setxattr ] = sys_ni_syscall, - [ __NR_lsetxattr ] = sys_ni_syscall, - [ __NR_fsetxattr ] = sys_ni_syscall, - [ __NR_getxattr ] = sys_ni_syscall, - [ __NR_lgetxattr ] = sys_ni_syscall, - [ __NR_fgetxattr ] = sys_ni_syscall, - [ __NR_listxattr ] = sys_ni_syscall, - [ __NR_llistxattr ] = sys_ni_syscall, - [ __NR_flistxattr ] = sys_ni_syscall, - [ __NR_removexattr ] = sys_ni_syscall, - [ __NR_lremovexattr ] = sys_ni_syscall, - [ __NR_fremovexattr ] = sys_ni_syscall, + [ __NR_setxattr ] = sys_setxattr, + [ __NR_lsetxattr ] = sys_lsetxattr, + [ __NR_fsetxattr ] = sys_fsetxattr, + [ __NR_getxattr ] = sys_getxattr, + [ __NR_lgetxattr ] = sys_lgetxattr, + [ __NR_fgetxattr ] = sys_fgetxattr, + [ __NR_listxattr ] = sys_listxattr, + [ __NR_llistxattr ] = sys_llistxattr, + [ __NR_flistxattr ] = sys_flistxattr, + [ __NR_removexattr ] = sys_removexattr, + [ __NR_lremovexattr ] = sys_lremovexattr, + [ __NR_fremovexattr ] = sys_fremovexattr, [ __NR_tkill ] = sys_tkill, [ __NR_sendfile64 ] = sys_sendfile64, [ __NR_futex ] = sys_futex, [ __NR_sched_setaffinity ] = sys_sched_setaffinity, [ __NR_sched_getaffinity ] = sys_sched_getaffinity, + [ __NR_set_thread_area ] = sys_ni_syscall, + [ __NR_get_thread_area ] = sys_ni_syscall, [ __NR_io_setup ] = sys_io_setup, [ __NR_io_destroy ] = sys_io_destroy, [ __NR_io_getevents ] = sys_io_getevents, [ __NR_io_submit ] = sys_io_submit, [ __NR_io_cancel ] = sys_io_cancel, + [ __NR_fadvise64 ] = sys_fadvise64, + [ 251 ] = sys_ni_syscall, [ __NR_exit_group ] = sys_exit_group, [ __NR_lookup_dcookie ] = sys_lookup_dcookie, [ __NR_epoll_create ] = sys_epoll_create, @@ -488,6 +521,21 @@ syscall_handler_t *sys_call_table[] = { [ __NR_epoll_wait ] = sys_epoll_wait, [ __NR_remap_file_pages ] = sys_remap_file_pages, [ __NR_set_tid_address ] = sys_set_tid_address, + [ __NR_timer_create ] = sys_timer_create, + [ __NR_timer_settime ] = sys_timer_settime, + [ __NR_timer_gettime ] = sys_timer_gettime, + [ __NR_timer_getoverrun ] = sys_timer_getoverrun, + [ __NR_timer_delete ] = sys_timer_delete, + [ __NR_clock_settime ] = sys_clock_settime, + [ __NR_clock_gettime ] = sys_clock_gettime, + [ __NR_clock_getres ] = sys_clock_getres, + [ __NR_clock_nanosleep ] = sys_clock_nanosleep, + [ __NR_statfs64 ] = sys_statfs64, + [ __NR_fstatfs64 ] = sys_fstatfs64, + [ __NR_tgkill ] = sys_tgkill, + [ __NR_utimes ] = sys_utimes, + [ __NR_fadvise64_64 ] = sys_fadvise64_64, + [ __NR_vserver ] = sys_ni_syscall, ARCH_SYSCALLS [ LAST_SYSCALL + 1 ... NR_syscalls ] = --- linux-2.6.3-rc2/arch/um/kernel/sysrq.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/sysrq.c 2004-02-12 00:40:26.000000000 -0800 @@ -55,6 +55,14 @@ void show_trace_task(struct task_struct show_trace((unsigned long *)esp); } +void show_stack(struct task_struct *task, unsigned long *sp) +{ + if(task) + show_trace_task(task); + else + show_trace(sp); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.3-rc2/arch/um/kernel/tempfile.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/tempfile.c 2004-02-12 00:40:26.000000000 -0800 @@ -28,6 +28,7 @@ static void __init find_tempdir(void) } if((dir == NULL) || (*dir == '\0')) dir = "/tmp"; + tempdir = malloc(strlen(dir) + 2); if(tempdir == NULL){ fprintf(stderr, "Failed to malloc tempdir, " @@ -49,7 +50,8 @@ int make_tempfile(const char *template, else *tempname = 0; strcat(tempname, template); - if((fd = mkstemp(tempname)) < 0){ + fd = mkstemp(tempname); + if(fd < 0){ fprintf(stderr, "open - cannot create %s: %s\n", tempname, strerror(errno)); return -1; @@ -59,7 +61,8 @@ int make_tempfile(const char *template, return -1; } if(out_tempname){ - if((*out_tempname = strdup(tempname)) == NULL){ + *out_tempname = strdup(tempname); + if(*out_tempname == NULL){ perror("strdup"); return -1; } --- linux-2.6.3-rc2/arch/um/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/time.c 2004-02-12 00:40:26.000000000 -0800 @@ -4,24 +4,33 @@ */ #include +#include #include #include #include #include #include -#include "linux/module.h" #include "user_util.h" #include "kern_util.h" #include "user.h" #include "process.h" #include "signal_user.h" #include "time_user.h" +#include "kern_constants.h" + +/* XXX This really needs to be declared and initialized in a kernel file since + * it's in + */ +extern struct timespec wall_to_monotonic; extern struct timeval xtime; +struct timeval local_offset = { 0, 0 }; + void timer(void) { gettimeofday(&xtime, NULL); + timeradd(&xtime, &local_offset, &xtime); } void set_interval(int timer_type) @@ -66,7 +75,7 @@ void switch_timers(int to_real) errno); } -void idle_timer(void) +void uml_idle_timer(void) { if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) panic("Couldn't unset SIGVTALRM handler"); @@ -76,14 +85,54 @@ void idle_timer(void) set_interval(ITIMER_REAL); } +static unsigned long long get_host_hz(void) +{ + char mhzline[16], *end; + int ret, mult, mhz, rest, len; + + ret = cpu_feature("cpu MHz", mhzline, + sizeof(mhzline) / sizeof(mhzline[0])); + if(!ret) + panic ("Could not get host MHZ"); + + mhz = strtoul(mhzline, &end, 10); + + /* This business is to parse a floating point number without using + * floating types. + */ + + rest = 0; + mult = 0; + if(*end == '.'){ + end++; + len = strlen(end); + if(len < 6) + mult = 6 - len; + else if(len > 6) + end[6] = '\0'; + rest = strtoul(end, NULL, 10); + while(mult-- > 0) + rest *= 10; + } + + return(1000000 * mhz + rest); +} + +unsigned long long host_hz = 0; + void time_init(void) { + struct timespec now; + + host_hz = get_host_hz(); if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) panic("Couldn't set SIGVTALRM handler"); set_interval(ITIMER_VIRTUAL); -} -struct timeval local_offset = { 0, 0 }; + do_posix_clock_monotonic_gettime(&now); + wall_to_monotonic.tv_sec = -now.tv_sec; + wall_to_monotonic.tv_nsec = -now.tv_nsec; +} void do_gettimeofday(struct timeval *tv) { @@ -95,15 +144,13 @@ void do_gettimeofday(struct timeval *tv) time_unlock(flags); } -EXPORT_SYMBOL(do_gettimeofday); - int do_settimeofday(struct timespec *tv) { struct timeval now; unsigned long flags; struct timeval tv_in; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC) return -EINVAL; tv_in.tv_sec = tv->tv_sec; @@ -113,9 +160,9 @@ int do_settimeofday(struct timespec *tv) gettimeofday(&now, NULL); timersub(&tv_in, &now, &local_offset); time_unlock(flags); -} -EXPORT_SYMBOL(do_settimeofday); + return(0); +} void idle_sleep(int secs) { --- linux-2.6.3-rc2/arch/um/kernel/time_kern.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/time_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -30,6 +30,14 @@ int hz(void) return(HZ); } +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + return (unsigned long long)jiffies_64 * (1000000000 / HZ); +} + /* Changed at early boot */ int timer_irq_inited = 0; @@ -39,13 +47,47 @@ int timer_irq_inited = 0; */ int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; +static int first_tick; +static unsigned long long prev_tsc; +static long long delta; /* Deviation per interval */ + +extern unsigned long long host_hz; + void timer_irq(union uml_pt_regs *regs) { - int cpu = current->thread_info->cpu, ticks = missed_ticks[cpu]; + unsigned long long ticks = 0; + + if(!timer_irq_inited){ + /* This is to ensure that ticks don't pile up when + * the timer handler is suspended */ + first_tick = 0; + return; + } + + if(first_tick){ +#if defined(CONFIG_UML_REAL_TIME_CLOCK) + unsigned long long tsc; + /* We've had 1 tick */ + tsc = time_stamp(); + + delta += tsc - prev_tsc; + prev_tsc = tsc; + + ticks += (delta * HZ) / host_hz; + delta -= (ticks * host_hz) / HZ; +#else + ticks = 1; +#endif + } + else { + prev_tsc = time_stamp(); + first_tick = 1; + } - if(!timer_irq_inited) return; - missed_ticks[cpu] = 0; - while(ticks--) do_IRQ(TIMER_IRQ, regs); + while(ticks > 0){ + do_IRQ(TIMER_IRQ, regs); + ticks--; + } } void boot_timer_handler(int sig) @@ -58,12 +100,13 @@ void boot_timer_handler(int sig) do_timer(®s); } -void um_timer(int irq, void *dev, struct pt_regs *regs) +irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs) { do_timer(regs); - write_seqlock(&xtime_lock); + write_seqlock_irq(&xtime_lock); timer(); - write_sequnlock(&xtime_lock); + write_sequnlock_irq(&xtime_lock); + return(IRQ_HANDLED); } long um_time(int * tloc) @@ -81,12 +124,12 @@ long um_time(int * tloc) long um_stime(int * tptr) { int value; - struct timeval new; + struct timespec new; if (get_user(value, tptr)) return -EFAULT; new.tv_sec = value; - new.tv_usec = 0; + new.tv_nsec = 0; do_settimeofday(&new); return 0; } @@ -125,9 +168,11 @@ void __const_udelay(um_udelay_t usecs) void timer_handler(int sig, union uml_pt_regs *regs) { #ifdef CONFIG_SMP + local_irq_disable(); update_process_times(user_context(UPT_SP(regs))); + local_irq_enable(); #endif - if(current->thread_info->cpu == 0) + if(current_thread->cpu == 0) timer_irq(regs); } @@ -136,6 +181,7 @@ static spinlock_t timer_spinlock = SPIN_ unsigned long time_lock(void) { unsigned long flags; + spin_lock_irqsave(&timer_spinlock, flags); return(flags); } @@ -150,8 +196,8 @@ int __init timer_init(void) int err; CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); - if((err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", - NULL)) != 0) + err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); + if(err != 0) printk(KERN_ERR "timer_init : request_irq failed - " "errno = %d\n", -err); timer_irq_inited = 1; @@ -160,7 +206,6 @@ int __init timer_init(void) __initcall(timer_init); - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.3-rc2/arch/um/kernel/trap_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/kernel/trap_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -16,12 +16,15 @@ #include "asm/tlbflush.h" #include "asm/a.out.h" #include "asm/current.h" +#include "asm/irq.h" #include "user_util.h" #include "kern_util.h" #include "kern.h" #include "chan_kern.h" #include "mconsole_kern.h" #include "2_5compat.h" +#include "mem.h" +#include "mem_kern.h" int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) @@ -51,12 +54,12 @@ int handle_page_fault(unsigned long addr if(is_write && !(vma->vm_flags & VM_WRITE)) goto out; page = address & PAGE_MASK; - if(page == (unsigned long) current->thread_info + PAGE_SIZE) + if(page == (unsigned long) current_thread + PAGE_SIZE) panic("Kernel stack overflow"); pgd = pgd_offset(mm, page); pmd = pmd_offset(pgd, page); - survive: do { + survive: switch (handle_mm_fault(mm, vma, address, is_write)){ case VM_FAULT_MINOR: current->min_flt++; @@ -71,14 +74,20 @@ int handle_page_fault(unsigned long addr err = -ENOMEM; goto out_of_memory; default: - BUG(); + if (current->pid == 1) { + up_read(&mm->mmap_sem); + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + goto out; } pte = pte_offset_kernel(pmd, page); } while(!pte_present(*pte)); + err = 0; *pte = pte_mkyoung(*pte); if(pte_write(*pte)) *pte = pte_mkdirty(*pte); flush_tlb_page(vma, page); - err = 0; out: up_read(&mm->mmap_sem); return(err); @@ -98,6 +107,33 @@ out_of_memory: goto out; } +LIST_HEAD(physmem_remappers); + +void register_remapper(struct remapper *info) +{ + list_add(&info->list, &physmem_remappers); +} + +static int check_remapped_addr(unsigned long address, int is_write) +{ + struct remapper *remapper; + struct list_head *ele; + __u64 offset; + int fd; + + fd = phys_mapping(__pa(address), &offset); + if(fd == -1) + return(0); + + list_for_each(ele, &physmem_remappers){ + remapper = list_entry(ele, struct remapper, list); + if((*remapper->proc)(fd, address, is_write, offset)) + return(1); + } + + return(0); +} + unsigned long segv(unsigned long address, unsigned long ip, int is_write, int is_user, void *sc) { @@ -109,7 +145,9 @@ unsigned long segv(unsigned long address flush_tlb_kernel_vm(); return(0); } - if(current->mm == NULL) + else if(check_remapped_addr(address & PAGE_MASK, is_write)) + return(0); + else if(current->mm == NULL) panic("Segfault with no mm"); err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); @@ -120,9 +158,8 @@ unsigned long segv(unsigned long address current->thread.fault_addr = (void *) address; do_longjmp(catcher, 1); } - else if(current->thread.fault_addr != NULL){ + else if(current->thread.fault_addr != NULL) panic("fault_addr set but no fault catcher"); - } else if(arch_fixup(ip, sc)) return(0); @@ -155,8 +192,6 @@ void bad_segv(unsigned long address, uns { struct siginfo si; - printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx " - "(ip 0x%lx)\n", current->comm, current->pid, address, ip); si.si_signo = SIGSEGV; si.si_code = SEGV_ACCERR; si.si_addr = (void *) address; @@ -180,6 +215,11 @@ void bus_handler(int sig, union uml_pt_r else relay_signal(sig, regs); } +void winch(int sig, union uml_pt_regs *regs) +{ + do_IRQ(WINCH_IRQ, regs); +} + void trap_init(void) { } --- linux-2.6.3-rc2/arch/um/kernel/trap_user.c 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/kernel/trap_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,11 +5,9 @@ #include #include -#include #include #include #include -#include #include #include #include @@ -82,6 +80,8 @@ struct signal_info sig_info[] = { .is_irq = 0 }, [ SIGILL ] { .handler = relay_signal, .is_irq = 0 }, + [ SIGWINCH ] { .handler = winch, + .is_irq = 1 }, [ SIGBUS ] { .handler = bus_handler, .is_irq = 0 }, [ SIGSEGV] { .handler = segv_handler, @@ -123,7 +123,7 @@ void do_longjmp(void *b, int val) { jmp_buf *buf = b; - longjmp(*buf, val); + siglongjmp(*buf, val); } /* --- linux-2.6.3-rc2/arch/um/kernel/tt/exec_kern.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/tt/exec_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -17,6 +17,7 @@ #include "mem_user.h" #include "os.h" #include "tlb.h" +#include "mode.h" static int exec_tramp(void *sig_stack) { @@ -47,17 +48,17 @@ void flush_thread_tt(void) do_exit(SIGKILL); } - if(current->thread_info->cpu == 0) + if(current_thread->cpu == 0) forward_interrupts(new_pid); current->thread.request.op = OP_EXEC; current->thread.request.u.exec.pid = new_pid; - unprotect_stack((unsigned long) current->thread_info); + unprotect_stack((unsigned long) current_thread); os_usr1_process(os_getpid()); enable_timer(); free_page(stack); protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); - task_protections((unsigned long) current->thread_info); + task_protections((unsigned long) current_thread); force_flush_all(); unblock_signals(); } --- linux-2.6.3-rc2/arch/um/kernel/tt/include/mode.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/tt/include/mode.h 2004-02-12 00:40:26.000000000 -0800 @@ -8,6 +8,8 @@ #include "sysdep/ptrace.h" +enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; + extern int tracing_pid; extern int tracer(int (*init_proc)(void *), void *sp); --- linux-2.6.3-rc2/arch/um/kernel/tt/include/uaccess.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/tt/include/uaccess.h 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -43,65 +43,19 @@ extern unsigned long get_fault_addr(void extern int __do_copy_from_user(void *to, const void *from, int n, void **fault_addr, void **fault_catcher); - -static inline int copy_from_user_tt(void *to, const void *from, int n) -{ - return(access_ok_tt(VERIFY_READ, from, n) ? - __do_copy_from_user(to, from, n, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher) : n); -} - -static inline int copy_to_user_tt(void *to, const void *from, int n) -{ - return(access_ok_tt(VERIFY_WRITE, to, n) ? - __do_copy_to_user(to, from, n, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher) : n); -} - extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, void **fault_addr, void **fault_catcher); - -static inline int strncpy_from_user_tt(char *dst, const char *src, int count) -{ - int n; - - if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT); - n = __do_strncpy_from_user(dst, src, count, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher); - if(n < 0) return(-EFAULT); - return(n); -} - extern int __do_clear_user(void *mem, size_t len, void **fault_addr, void **fault_catcher); - -static inline int __clear_user_tt(void *mem, int len) -{ - return(__do_clear_user(mem, len, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)); -} - -static inline int clear_user_tt(void *mem, int len) -{ - return(access_ok_tt(VERIFY_WRITE, mem, len) ? - __do_clear_user(mem, len, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher) : len); -} - extern int __do_strnlen_user(const char *str, unsigned long n, void **fault_addr, void **fault_catcher); -static inline int strnlen_user_tt(const void *str, int len) -{ - return(__do_strnlen_user(str, len, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)); -} +extern int copy_from_user_tt(void *to, const void *from, int n); +extern int copy_to_user_tt(void *to, const void *from, int n); +extern int strncpy_from_user_tt(char *dst, const char *src, int count); +extern int __clear_user_tt(void *mem, int len); +extern int clear_user_tt(void *mem, int len); +extern int strnlen_user_tt(const void *str, int len); #endif --- linux-2.6.3-rc2/arch/um/kernel/tt/Makefile 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/tt/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ # -# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) # Licensed under the GPL # @@ -7,7 +7,7 @@ extra-y := unmap_fin.o obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ - uaccess_user.o sys-$(SUBARCH)/ + uaccess.o uaccess_user.o sys-$(SUBARCH)/ obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ --- linux-2.6.3-rc2/arch/um/kernel/tt/mem_user.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/arch/um/kernel/tt/mem_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -25,14 +25,13 @@ void remap_data(void *segment_start, voi size = (unsigned long) segment_end - (unsigned long) segment_start; data = create_mem_file(size); - if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ, - MAP_SHARED, data, 0)) == MAP_FAILED){ + addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0); + if(addr == MAP_FAILED){ perror("mapping new data segment"); exit(1); } memcpy(addr, segment_start, size); - if(switcheroo(data, prot, addr, segment_start, - size) < 0){ + if(switcheroo(data, prot, addr, segment_start, size) < 0){ printf("switcheroo failed\n"); exit(1); } --- linux-2.6.3-rc2/arch/um/kernel/tt/process_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/tt/process_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -62,7 +62,7 @@ void *switch_to_tt(void *prev, void *nex reading = 0; err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); if(err != sizeof(c)) - panic("write of switch_pipe failed, errno = %d", -err); + panic("write of switch_pipe failed, err = %d", -err); reading = 1; if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD)) @@ -104,48 +104,72 @@ void *switch_to_tt(void *prev, void *nex void release_thread_tt(struct task_struct *task) { - os_kill_process(task->thread.mode.tt.extern_pid, 0); + int pid = task->thread.mode.tt.extern_pid; + + if(os_getpid() != pid) + os_kill_process(pid, 0); } void exit_thread_tt(void) { - close(current->thread.mode.tt.switch_pipe[0]); - close(current->thread.mode.tt.switch_pipe[1]); + os_close_file(current->thread.mode.tt.switch_pipe[0]); + os_close_file(current->thread.mode.tt.switch_pipe[1]); } void schedule_tail(task_t *prev); static void new_thread_handler(int sig) { + unsigned long disable; int (*fn)(void *); void *arg; fn = current->thread.request.u.thread.proc; arg = current->thread.request.u.thread.arg; + UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); + disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) | + (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1)); + SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable; + suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); - block_signals(); + force_flush_all(); + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + init_new_thread_signals(1); -#ifdef CONFIG_SMP - schedule_tail(current->thread.prev_sched); -#endif enable_timer(); free_page(current->thread.temp_stack); set_cmdline("(kernel thread)"); - force_flush_all(); - current->thread.prev_sched = NULL; change_sig(SIGUSR1, 1); change_sig(SIGVTALRM, 1); change_sig(SIGPROF, 1); - unblock_signals(); + local_irq_enable(); if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) do_exit(0); } static int new_thread_proc(void *stack) { + /* local_irq_disable is needed to block out signals until this thread is + * properly scheduled. Otherwise, the tracing thread will get mighty + * upset about any signals that arrive before that. + * This has the complication that it sets the saved signal mask in + * the sigcontext to block signals. This gets restored when this + * thread (or a descendant, since they get a copy of this sigcontext) + * returns to userspace. + * So, this is compensated for elsewhere. + * XXX There is still a small window until local_irq_disable() actually + * finishes where signals are possible - shouldn't be a problem in + * practice since SIGIO hasn't been forwarded here yet, and the + * local_irq_disable should finish before a SIGVTALRM has time to be + * delivered. + */ + + local_irq_disable(); init_new_thread_stack(stack, new_thread_handler); os_usr1_process(os_getpid()); return(0); @@ -156,7 +180,7 @@ static int new_thread_proc(void *stack) * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, * so it is blocked before it's called. They are re-enabled on sigreturn * despite the fact that they were blocked when the SIGUSR1 was issued because - * copy_thread copies the parent's signcontext, including the signal mask + * copy_thread copies the parent's sigcontext, including the signal mask * onto the signal frame. */ @@ -165,35 +189,32 @@ void finish_fork_handler(int sig) UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); -#ifdef CONFIG_SMP - schedule_tail(NULL); -#endif + force_flush_all(); + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + enable_timer(); change_sig(SIGVTALRM, 1); local_irq_enable(); - force_flush_all(); if(current->mm != current->parent->mm) protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); - task_protections((unsigned long) current->thread_info); - - current->thread.prev_sched = NULL; + task_protections((unsigned long) current_thread); free_page(current->thread.temp_stack); + local_irq_disable(); change_sig(SIGUSR1, 0); set_user_mode(current); } -static int sigusr1 = SIGUSR1; - int fork_tramp(void *stack) { - int sig = sigusr1; - local_irq_disable(); + arch_init_thread(); init_new_thread_stack(stack, finish_fork_handler); - kill(os_getpid(), sig); + os_usr1_process(os_getpid()); return(0); } @@ -213,8 +234,8 @@ int copy_thread_tt(int nr, unsigned long } err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1); - if(err){ - printk("copy_thread : pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("copy_thread : pipe failed, err = %d\n", -err); return(err); } @@ -377,8 +398,8 @@ static void mprotect_kernel_mem(int w) pages = (1 << CONFIG_KERNEL_STACK_ORDER); - start = (unsigned long) current->thread_info + PAGE_SIZE; - end = (unsigned long) current + PAGE_SIZE * pages; + start = (unsigned long) current_thread + PAGE_SIZE; + end = (unsigned long) current_thread + PAGE_SIZE * pages; protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); protect_memory(end, high_physmem - end, 1, w, 1, 1); @@ -454,8 +475,9 @@ void set_init_pid(int pid) init_task.thread.mode.tt.extern_pid = pid; err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1); - if(err) panic("Can't create switch pipe for init_task, errno = %d", - err); + if(err) + panic("Can't create switch pipe for init_task, errno = %d", + -err); } int singlestepping_tt(void *t) --- linux-2.6.3-rc2/arch/um/kernel/tt/ptproxy/proxy.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/kernel/tt/ptproxy/proxy.c 2004-02-12 00:40:26.000000000 -0800 @@ -15,7 +15,6 @@ Jeff Dike (jdike@karaya.com) : Modified #include #include #include -#include #include #include #include @@ -293,10 +292,10 @@ void fake_child_exit(void) } char gdb_init_string[] = -"att 1 -b panic -b stop -handle SIGWINCH nostop noprint pass +"att 1 \n\ +b panic \n\ +b stop \n\ +handle SIGWINCH nostop noprint pass \n\ "; int start_debugger(char *prog, int startup, int stop, int *fd_out) @@ -304,7 +303,8 @@ int start_debugger(char *prog, int start int slave, child; slave = open_gdb_chan(); - if((child = fork()) == 0){ + child = fork(); + if(child == 0){ char *tempname = NULL; int fd; @@ -327,18 +327,19 @@ int start_debugger(char *prog, int start exit(1); #endif } - if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){ - printk("start_debugger : make_tempfile failed, errno = %d\n", - errno); + fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0); + if(fd < 0){ + printk("start_debugger : make_tempfile failed," + "err = %d\n", -fd); exit(1); } - write(fd, gdb_init_string, sizeof(gdb_init_string) - 1); + os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); if(startup){ if(stop){ - write(fd, "b start_kernel\n", + os_write_file(fd, "b start_kernel\n", strlen("b start_kernel\n")); } - write(fd, "c\n", strlen("c\n")); + os_write_file(fd, "c\n", strlen("c\n")); } if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ printk("start_debugger : PTRACE_TRACEME failed, " --- linux-2.6.3-rc2/arch/um/kernel/tt/ptproxy/sysdep.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/kernel/tt/ptproxy/sysdep.c 2004-02-12 00:40:26.000000000 -0800 @@ -9,6 +9,7 @@ terms and conditions. #include #include #include +#include #include #include #include --- linux-2.6.3-rc2/arch/um/kernel/tt/ptproxy/wait.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/arch/um/kernel/tt/ptproxy/wait.c 2004-02-12 00:40:26.000000000 -0800 @@ -56,21 +56,23 @@ int parent_wait_return(struct debugger * int real_wait_return(struct debugger *debugger) { unsigned long ip; - int err, pid; + int pid; pid = debugger->pid; + ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); ip = IP_RESTART_SYSCALL(ip); - err = ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip); + if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) tracer_panic("real_wait_return : Failed to restart system " - "call, errno = %d\n"); + "call, errno = %d\n", errno); + if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || debugger_normal_return(debugger, -1)) tracer_panic("real_wait_return : gdb failed to wait, " - "errno = %d\n"); + "errno = %d\n", errno); return(0); } --- linux-2.6.3-rc2/arch/um/kernel/tt/syscall_kern.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/kernel/tt/syscall_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ --- linux-2.6.3-rc2/arch/um/kernel/tt/tracer.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/kernel/tt/tracer.c 2004-02-12 00:40:26.000000000 -0800 @@ -39,16 +39,17 @@ int is_tracer_winch(int pid, int fd, voi return(0); register_winch_irq(tracer_winch[0], fd, -1, data); - return(0); + return(1); } static void tracer_winch_handler(int sig) { + int n; char c = 1; - if(write(tracer_winch[1], &c, sizeof(c)) != sizeof(c)) - printk("tracer_winch_handler - write failed, errno = %d\n", - errno); + n = os_write_file(tracer_winch[1], &c, sizeof(c)); + if(n != sizeof(c)) + printk("tracer_winch_handler - write failed, err = %d\n", -n); } /* Called only by the tracing thread during initialization */ @@ -58,9 +59,8 @@ static void setup_tracer_winch(void) int err; err = os_pipe(tracer_winch, 1, 1); - if(err){ - printk("setup_tracer_winch : os_pipe failed, errno = %d\n", - -err); + if(err < 0){ + printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err); return; } signal(SIGWINCH, tracer_winch_handler); @@ -130,8 +130,8 @@ static void sleeping_process_signal(int case SIGTSTP: if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) tracer_panic("sleeping_process_signal : Failed to " - "continue pid %d, errno = %d\n", pid, - sig); + "continue pid %d, signal = %d, " + "errno = %d\n", pid, sig, errno); break; /* This happens when the debugger (e.g. strace) is doing system call @@ -145,7 +145,7 @@ static void sleeping_process_signal(int if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) tracer_panic("sleeping_process_signal : Failed to " "PTRACE_SYSCALL pid %d, errno = %d\n", - pid, sig); + pid, errno); break; case SIGSTOP: break; @@ -218,7 +218,7 @@ int tracer(int (*init_proc)(void *), voi err = attach(debugger_parent); if(err){ printf("Failed to attach debugger parent %d, " - "errno = %d\n", debugger_parent, err); + "errno = %d\n", debugger_parent, -err); debugger_parent = -1; } else { @@ -233,7 +233,8 @@ int tracer(int (*init_proc)(void *), voi } set_cmdline("(tracing thread)"); while(1){ - if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){ + pid = waitpid(-1, &status, WUNTRACED); + if(pid <= 0){ if(errno != ECHILD){ printf("wait failed - errno = %d\n", errno); } @@ -401,7 +402,7 @@ static int __init uml_debug_setup(char * if(!strcmp(line, "go")) debug_stop = 0; else if(!strcmp(line, "parent")) debug_parent = 1; - else printk("Unknown debug option : '%s'\n", line); + else printf("Unknown debug option : '%s'\n", line); line = next; } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/kernel/tt/uaccess.c 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "asm/uaccess.h" + +int copy_from_user_tt(void *to, const void *from, int n) +{ + if(!access_ok_tt(VERIFY_READ, from, n)) + return(n); + + return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int copy_to_user_tt(void *to, const void *from, int n) +{ + if(!access_ok_tt(VERIFY_WRITE, to, n)) + return(n); + + return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int strncpy_from_user_tt(char *dst, const char *src, int count) +{ + int n; + + if(!access_ok_tt(VERIFY_READ, src, 1)) + return(-EFAULT); + + n = __do_strncpy_from_user(dst, src, count, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher); + if(n < 0) return(-EFAULT); + return(n); +} + +int __clear_user_tt(void *mem, int len) +{ + return(__do_clear_user(mem, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int clear_user_tt(void *mem, int len) +{ + if(!access_ok_tt(VERIFY_WRITE, mem, len)) + return(len); + + return(__do_clear_user(mem, len, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int strnlen_user_tt(const void *str, int len) +{ + return(__do_strnlen_user(str, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/kernel/tt/uaccess_user.c 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/kernel/tt/uaccess_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,15 +8,20 @@ #include #include "user_util.h" #include "uml_uaccess.h" +#include "task.h" +#include "kern_util.h" int __do_copy_from_user(void *to, const void *from, int n, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; unsigned long fault; int faulted; fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, __do_copy, &faulted); + TASK_REGS(get_current())->tt = save; + if(!faulted) return(0); else return(n - (fault - (unsigned long) from)); } @@ -29,11 +34,14 @@ static void __do_strncpy(void *dst, cons int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; unsigned long fault; int faulted; fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, __do_strncpy, &faulted); + TASK_REGS(get_current())->tt = save; + if(!faulted) return(strlen(dst)); else return(-1); } @@ -46,11 +54,14 @@ static void __do_clear(void *to, const v int __do_clear_user(void *mem, unsigned long len, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; unsigned long fault; int faulted; fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, __do_clear, &faulted); + TASK_REGS(get_current())->tt = save; + if(!faulted) return(0); else return(len - (fault - (unsigned long) mem)); } @@ -58,19 +69,20 @@ int __do_clear_user(void *mem, unsigned int __do_strnlen_user(const char *str, unsigned long n, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; int ret; unsigned long *faddrp = (unsigned long *)fault_addr; jmp_buf jbuf; *fault_catcher = &jbuf; - if(setjmp(jbuf) == 0){ + if(sigsetjmp(jbuf, 1) == 0) ret = strlen(str) + 1; - } - else { - ret = *faddrp - (unsigned long) str; - } + else ret = *faddrp - (unsigned long) str; + *fault_addr = NULL; *fault_catcher = NULL; + + TASK_REGS(get_current())->tt = save; return ret; } --- linux-2.6.3-rc2/arch/um/kernel/tt/unmap.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/kernel/tt/unmap.c 2004-02-12 00:40:26.000000000 -0800 @@ -3,10 +3,7 @@ * Licensed under the GPL */ -#include -#include #include -#include "user.h" int switcheroo(int fd, int prot, void *from, void *to, int size) { --- linux-2.6.3-rc2/arch/um/kernel/tty_log.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/tty_log.c 2004-02-12 00:40:26.000000000 -0800 @@ -9,10 +9,10 @@ #include #include #include -#include #include #include "init.h" #include "user.h" +#include "kern_util.h" #include "os.h" #define TTY_LOG_DIR "./" @@ -24,29 +24,40 @@ static int tty_log_fd = -1; #define TTY_LOG_OPEN 1 #define TTY_LOG_CLOSE 2 #define TTY_LOG_WRITE 3 +#define TTY_LOG_EXEC 4 + +#define TTY_READ 1 +#define TTY_WRITE 2 struct tty_log_buf { int what; unsigned long tty; int len; + int direction; + unsigned long sec; + unsigned long usec; }; -int open_tty_log(void *tty) +int open_tty_log(void *tty, void *current_tty) { struct timeval tv; struct tty_log_buf data; char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; int fd; + gettimeofday(&tv, NULL); if(tty_log_fd != -1){ - data = ((struct tty_log_buf) { what : TTY_LOG_OPEN, - tty : (unsigned long) tty, - len : 0 }); - write(tty_log_fd, &data, sizeof(data)); + data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, + .tty = (unsigned long) tty, + .len = sizeof(current_tty), + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + os_write_file(tty_log_fd, ¤t_tty, data.len); return(tty_log_fd); } - gettimeofday(&tv, NULL); sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, (unsigned int) tv.tv_usec); @@ -62,30 +73,117 @@ int open_tty_log(void *tty) void close_tty_log(int fd, void *tty) { struct tty_log_buf data; + struct timeval tv; if(tty_log_fd != -1){ - data = ((struct tty_log_buf) { what : TTY_LOG_CLOSE, - tty : (unsigned long) tty, - len : 0 }); - write(tty_log_fd, &data, sizeof(data)); + gettimeofday(&tv, NULL); + data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, + .tty = (unsigned long) tty, + .len = 0, + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); return; } - close(fd); + os_close_file(fd); } -int write_tty_log(int fd, char *buf, int len, void *tty) +static int log_chunk(int fd, const char *buf, int len) { + int total = 0, try, missed, n; + char chunk[64]; + + while(len > 0){ + try = (len > sizeof(chunk)) ? sizeof(chunk) : len; + missed = copy_from_user_proc(chunk, (char *) buf, try); + try -= missed; + n = os_write_file(fd, chunk, try); + if(n != try) { + if(n < 0) + return(n); + return(-EIO); + } + if(missed != 0) + return(-EFAULT); + + len -= try; + total += try; + buf += try; + } + + return(total); +} + +int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) +{ + struct timeval tv; struct tty_log_buf data; + int direction; if(fd == tty_log_fd){ - data = ((struct tty_log_buf) { what : TTY_LOG_WRITE, - tty : (unsigned long) tty, - len : len }); - write(tty_log_fd, &data, sizeof(data)); + gettimeofday(&tv, NULL); + direction = is_read ? TTY_READ : TTY_WRITE; + data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, + .tty = (unsigned long) tty, + .len = len, + .direction = direction, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); } - return(write(fd, buf, len)); + + return(log_chunk(fd, buf, len)); } +void log_exec(char **argv, void *tty) +{ + struct timeval tv; + struct tty_log_buf data; + char **ptr,*arg; + int len; + + if(tty_log_fd == -1) return; + + gettimeofday(&tv, NULL); + + len = 0; + for(ptr = argv; ; ptr++){ + if(copy_from_user_proc(&arg, ptr, sizeof(arg))) + return; + if(arg == NULL) break; + len += strlen_user_proc(arg); + } + + data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, + .tty = (unsigned long) tty, + .len = len, + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + + for(ptr = argv; ; ptr++){ + if(copy_from_user_proc(&arg, ptr, sizeof(arg))) + return; + if(arg == NULL) break; + log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); + } +} + +extern void register_tty_logger(int (*opener)(void *, void *), + int (*writer)(int, const char *, int, + void *, int), + void (*closer)(int, void *)); + +static int register_logger(void) +{ + register_tty_logger(open_tty_log, write_tty_log, close_tty_log); + return(0); +} + +__uml_initcall(register_logger); + static int __init set_tty_log_dir(char *name, int *add) { tty_log_dir = name; @@ -104,7 +202,7 @@ static int __init set_tty_log_fd(char *n tty_log_fd = strtoul(name, &end, 0); if((*end != '\0') || (end == name)){ - printk("set_tty_log_fd - strtoul failed on '%s'\n", name); + printf("set_tty_log_fd - strtoul failed on '%s'\n", name); tty_log_fd = -1; } return 0; --- linux-2.6.3-rc2/arch/um/kernel/uaccess_user.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/uaccess_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -20,7 +20,7 @@ unsigned long __do_user_copy(void *to, c jmp_buf jbuf; *fault_catcher = &jbuf; - if(setjmp(jbuf) == 0){ + if(sigsetjmp(jbuf, 1) == 0){ (*op)(to, from, n); ret = 0; *faulted_out = 0; --- linux-2.6.3-rc2/arch/um/kernel/um_arch.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/um/kernel/um_arch.c 2004-02-12 00:40:29.000000000 -0800 @@ -38,13 +38,18 @@ #include "mode_kern.h" #include "mode.h" -#define DEFAULT_COMMAND_LINE "root=6200" +#define DEFAULT_COMMAND_LINE "root=98:0" struct cpuinfo_um boot_cpu_data = { .loops_per_jiffy = 0, .ipi_pipe = { -1, -1 } }; +/* Placeholder to make UML link until the vsyscall stuff is actually + * implemented + */ +void *__kernel_vsyscall; + unsigned long thread_saved_pc(struct task_struct *task) { return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, @@ -53,18 +58,22 @@ unsigned long thread_saved_pc(struct tas static int show_cpuinfo(struct seq_file *m, void *v) { - int index; + int index = 0; - index = (struct cpuinfo_um *)v - cpu_data; #ifdef CONFIG_SMP + index = (struct cpuinfo_um *) v - cpu_data; if (!cpu_online(index)) return 0; #endif - seq_printf(m, "bogomips\t: %lu.%02lu\n", + seq_printf(m, "processor\t: %d\n", index); + seq_printf(m, "vendor_id\t: User Mode Linux\n"); + seq_printf(m, "model name\t: UML\n"); + seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); + seq_printf(m, "host\t\t: %s\n", host_info); + seq_printf(m, "bogomips\t: %lu.%02lu\n\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); - seq_printf(m, "host\t\t: %s\n", host_info); return(0); } @@ -134,12 +143,12 @@ void set_cmdline(char *cmd) if(umid != NULL){ snprintf(argv1_begin, (argv1_end - argv1_begin) * sizeof(*ptr), - "(%s)", umid); + "(%s) ", umid); ptr = &argv1_begin[strlen(argv1_begin)]; } else ptr = argv1_begin; - snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd); + snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); memset(argv1_begin + strlen(argv1_begin), '\0', argv1_end - argv1_begin - strlen(argv1_begin)); #endif @@ -179,7 +188,7 @@ __uml_setup("root=", uml_root_setup, static int __init uml_ncpus_setup(char *line, int *add) { if (!sscanf(line, "%d", &ncpus)) { - printk("Couldn't parse [%s]\n", line); + printf("Couldn't parse [%s]\n", line); return -1; } @@ -210,7 +219,7 @@ static int __init mode_tt_setup(char *li static int __init mode_tt_setup(char *line, int *add) { - printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); + printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); return(0); } @@ -221,7 +230,7 @@ static int __init mode_tt_setup(char *li static int __init mode_tt_setup(char *line, int *add) { - printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); + printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); return(0); } @@ -291,7 +300,7 @@ static void __init uml_postsetup(void) /* Set during early boot */ unsigned long brk_start; -static struct vm_reserved kernel_vm_reserved; +unsigned long end_iomem; #define MIN_VMALLOC (32 * 1024 * 1024) @@ -299,7 +308,7 @@ int linux_main(int argc, char **argv) { unsigned long avail; unsigned long virtmem_size, max_physmem; - unsigned int i, add, err; + unsigned int i, add; for (i = 1; i < argc; i++){ if((i == 1) && (argv[i][0] == ' ')) continue; @@ -328,12 +337,16 @@ int linux_main(int argc, char **argv) argv1_end = &argv[1][strlen(argv[1])]; #endif - set_usable_vm(uml_physmem, get_kmem_end()); - highmem = 0; - max_physmem = get_kmem_end() - uml_physmem - MIN_VMALLOC; - if(physmem_size > max_physmem){ - highmem = physmem_size - max_physmem; + iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; + + /* Zones have to begin on a 1 << MAX_ORDER page boundary, + * so this makes sure that's true for highmem + */ + max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); + if(physmem_size + iomem_size > max_physmem){ + highmem = physmem_size + iomem_size - max_physmem; physmem_size -= highmem; #ifndef CONFIG_HIGHMEM highmem = 0; @@ -343,11 +356,19 @@ int linux_main(int argc, char **argv) } high_physmem = uml_physmem + physmem_size; - high_memory = (void *) high_physmem; + end_iomem = high_physmem + iomem_size; + high_memory = (void *) end_iomem; start_vm = VMALLOC_START; - setup_physmem(uml_physmem, uml_reserved, physmem_size); + setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); + if(init_maps(physmem_size, iomem_size, highmem)){ + printf("Failed to allocate mem_map for %ld bytes of physical " + "memory and %ld bytes of highmem\n", physmem_size, + highmem); + exit(1); + } + virtmem_size = physmem_size; avail = get_kmem_end() - start_vm; if(physmem_size > avail) virtmem_size = avail; @@ -357,18 +378,13 @@ int linux_main(int argc, char **argv) printf("Kernel virtual memory size shrunk to %ld bytes\n", virtmem_size); - err = reserve_vm(high_physmem, end_vm, &kernel_vm_reserved); - if(err){ - printf("Failed to reserve VM area for kernel VM\n"); - exit(1); - } - uml_postsetup(); init_task.thread.kernel_stack = (unsigned long) &init_thread_info + 2 * PAGE_SIZE; task_protections((unsigned long) &init_thread_info); + os_flush_stdout(); return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); } @@ -377,7 +393,7 @@ static int panic_exit(struct notifier_bl void *unused2) { #ifdef CONFIG_MAGIC_SYSRQ - handle_sysrq('p', ¤t->thread.regs, NULL, NULL); + handle_sysrq('p', ¤t->thread.regs, NULL); #endif machine_halt(); return(0); @@ -403,6 +419,7 @@ void __init check_bugs(void) arch_check_bugs(); check_ptrace(); check_sigio(); + check_devanon(); } /* --- linux-2.6.3-rc2/arch/um/kernel/umid.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/kernel/umid.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -33,18 +32,19 @@ static char *uml_dir = UML_DIR; static int umid_is_random = 1; static int umid_inited = 0; -static int make_umid(void); +static int make_umid(int (*printer)(const char *fmt, ...)); -static int __init set_umid(char *name, int is_random) +static int __init set_umid(char *name, int is_random, + int (*printer)(const char *fmt, ...)) { if(umid_inited){ - printk("Unique machine name can't be set twice\n"); + (*printer)("Unique machine name can't be set twice\n"); return(-1); } if(strlen(name) > UMID_LEN - 1) - printk("Unique machine name is being truncated to %s " - "characters\n", UMID_LEN); + (*printer)("Unique machine name is being truncated to %s " + "characters\n", UMID_LEN); strlcpy(umid, name, sizeof(umid)); umid_is_random = is_random; @@ -54,7 +54,7 @@ static int __init set_umid(char *name, i static int __init set_umid_arg(char *name, int *add) { - return(set_umid(name, 0)); + return(set_umid(name, 0, printf)); } __uml_setup("umid=", set_umid_arg, @@ -67,7 +67,7 @@ int __init umid_file_name(char *name, ch { int n; - if(!umid_inited && make_umid()) return(-1); + if(!umid_inited && make_umid(printk)) return(-1); n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; if(n > len){ @@ -85,22 +85,23 @@ static int __init create_pid_file(void) { char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; char pid[sizeof("nnnnn\0")]; - int fd; + int fd, n; if(umid_file_name("pid", file, sizeof(file))) return 0; fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), 0644); if(fd < 0){ - printk("Open of machine pid file \"%s\" failed - " - "errno = %d\n", file, -fd); + printf("Open of machine pid file \"%s\" failed - " + "err = %d\n", file, -fd); return 0; } sprintf(pid, "%d\n", os_getpid()); - if(write(fd, pid, strlen(pid)) != strlen(pid)) - printk("Write of pid file failed - errno = %d\n", errno); - close(fd); + n = os_write_file(fd, pid, strlen(pid)); + if(n != strlen(pid)) + printf("Write of pid file failed - err = %d\n", -n); + os_close_file(fd); return 0; } @@ -111,7 +112,8 @@ static int actually_do_remove(char *dir) int len; char file[256]; - if((directory = opendir(dir)) == NULL){ + directory = opendir(dir); + if(directory == NULL){ printk("actually_do_remove : couldn't open directory '%s', " "errno = %d\n", dir, errno); return(1); @@ -160,22 +162,24 @@ int not_dead_yet(char *dir) { char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; char pid[sizeof("nnnnn\0")], *end; - int dead, fd, p; + int dead, fd, p, n; sprintf(file, "%s/pid", dir); dead = 0; - if((fd = os_open_file(file, of_read(OPENFLAGS()), 0)) < 0){ + fd = os_open_file(file, of_read(OPENFLAGS()), 0); + if(fd < 0){ if(fd != -ENOENT){ printk("not_dead_yet : couldn't open pid file '%s', " - "errno = %d\n", file, -fd); + "err = %d\n", file, -fd); return(1); } dead = 1; } if(fd > 0){ - if(read(fd, pid, sizeof(pid)) < 0){ + n = os_read_file(fd, pid, sizeof(pid)); + if(n < 0){ printk("not_dead_yet : couldn't read pid file '%s', " - "errno = %d\n", file, errno); + "err = %d\n", file, -n); return(1); } p = strtoul(pid, &end, 0); @@ -197,7 +201,7 @@ static int __init set_uml_dir(char *name if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ uml_dir = malloc(strlen(name) + 1); if(uml_dir == NULL){ - printk("Failed to malloc uml_dir - error = %d\n", + printf("Failed to malloc uml_dir - error = %d\n", errno); uml_dir = name; return(0); @@ -217,7 +221,7 @@ static int __init make_uml_dir(void) char *home = getenv("HOME"); if(home == NULL){ - printk("make_uml_dir : no value in environment for " + printf("make_uml_dir : no value in environment for " "$HOME\n"); exit(1); } @@ -232,57 +236,59 @@ static int __init make_uml_dir(void) dir[len + 1] = '\0'; } - if((uml_dir = malloc(strlen(dir) + 1)) == NULL){ + uml_dir = malloc(strlen(dir) + 1); + if(uml_dir == NULL){ printf("make_uml_dir : malloc failed, errno = %d\n", errno); exit(1); } strcpy(uml_dir, dir); if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ - printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno); + printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); return(-1); } return 0; } -static int __init make_umid(void) +static int __init make_umid(int (*printer)(const char *fmt, ...)) { int fd, err; char tmp[strlen(uml_dir) + UMID_LEN + 1]; strlcpy(tmp, uml_dir, sizeof(tmp)); - if(*umid == 0){ + if(!umid_inited){ strcat(tmp, "XXXXXX"); fd = mkstemp(tmp); if(fd < 0){ - printk("make_umid - mkstemp failed, errno = %d\n", - errno); + (*printer)("make_umid - mkstemp failed, errno = %d\n", + errno); return(1); } - close(fd); + os_close_file(fd); /* There's a nice tiny little race between this unlink and * the mkdir below. It'd be nice if there were a mkstemp * for directories. */ unlink(tmp); - set_umid(&tmp[strlen(uml_dir)], 1); + set_umid(&tmp[strlen(uml_dir)], 1, printer); } sprintf(tmp, "%s%s", uml_dir, umid); - if((err = mkdir(tmp, 0777)) < 0){ + err = mkdir(tmp, 0777); + if(err < 0){ if(errno == EEXIST){ if(not_dead_yet(tmp)){ - printk("umid '%s' is in use\n", umid); + (*printer)("umid '%s' is in use\n", umid); return(-1); } err = mkdir(tmp, 0777); } } if(err < 0){ - printk("Failed to create %s - errno = %d\n", umid, errno); + (*printer)("Failed to create %s - errno = %d\n", umid, errno); return(-1); } @@ -295,7 +301,13 @@ __uml_setup("uml_dir=", set_uml_dir, ); __uml_postsetup(make_uml_dir); -__uml_postsetup(make_umid); + +static int __init make_umid_setup(void) +{ + return(make_umid(printf)); +} + +__uml_postsetup(make_umid_setup); __uml_postsetup(create_pid_file); /* --- linux-2.6.3-rc2/arch/um/kernel/user_syms.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/kernel/user_syms.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include #include #include @@ -27,7 +27,7 @@ struct module_symbol #define __MODULE_STRING_1(x) #x #define __MODULE_STRING(x) __MODULE_STRING_1(x) -#if !defined(__AUTOCONF_INCLUDED__) +#if !defined(AUTOCONF_INCLUDED) #define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module #define EXPORT_SYMBOL(var) error config_must_be_included_before_module --- linux-2.6.3-rc2/arch/um/kernel/user_util.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/user_util.c 2004-02-12 00:40:26.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -82,7 +81,8 @@ int wait_for_stop(int pid, int sig, int int status, ret; while(1){ - if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) || + ret = waitpid(pid, &status, WUNTRACED); + if((ret < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ if(ret < 0){ if(errno == EINTR) continue; @@ -119,17 +119,6 @@ int wait_for_stop(int pid, int sig, int } } -int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags) -{ - int pid; - - pid = clone(fn, sp, flags, arg); - if(pid < 0) return(-1); - wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); - ptrace(PTRACE_CONT, pid, 0, 0); - return(pid); -} - int raw(int fd, int complain) { struct termios tt; --- linux-2.6.3-rc2/arch/um/main.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/main.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -123,12 +124,14 @@ int main(int argc, char **argv, char **e set_stklim(); - if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){ + new_argv = malloc((argc + 1) * sizeof(char *)); + if(new_argv == NULL){ perror("Mallocing argv"); exit(1); } for(i=0;i $@ +filechk_$(ARCH_DIR)/include/task.h := $(ARCH_DIR)/util/mk_task + $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task - $< > $@ + $(call filechk,$@) + +filechk_$(ARCH_DIR)/include/kern_constants.h := $(ARCH_DIR)/util/mk_constants $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants - $< > $@ + $(call filechk,$@) -$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \ - $(ARCH_DIR)/util FORCE ; +$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \ + sys_prepare FORCE ; $(ARCH_DIR)/util: FORCE - @$(call descend,$@,) + $(MAKE) -f scripts/Makefile.build obj=$@ -export SUBARCH USER_CFLAGS OS +export SUBARCH USER_CFLAGS OS --- linux-2.6.3-rc2/arch/um/Makefile-i386 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/Makefile-i386 2004-02-12 00:40:26.000000000 -0800 @@ -16,22 +16,28 @@ SYS_UTIL_DIR := $(ARCH_DIR)/sys-i386/uti SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h +sys_prepare: $(SYS_DIR)/sc.h + prepare: $(SYS_HEADERS) +filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc + $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc - $< > $@ + $(call filechk,$@) + +filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread - $< > $@ + $(call filechk,$@) -$(SYS_UTIL_DIR)/mk_sc: FORCE ; - @$(call descend,$(SYS_UTIL_DIR),$@) +$(SYS_UTIL_DIR)/mk_sc: scripts/fixdep include/config/MARKER FORCE ; + +@$(call descend,$(SYS_UTIL_DIR),$@) -$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ; - @$(call descend,$(SYS_UTIL_DIR),$@) +$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ; + +@$(call descend,$(SYS_UTIL_DIR),$@) $(SYS_UTIL_DIR): include/asm FORCE - @$(call descend,$@,) + +@$(call descend,$@,) sysclean : rm -f $(SYS_HEADERS) --- linux-2.6.3-rc2/arch/um/Makefile-skas 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/Makefile-skas 2004-02-12 00:40:26.000000000 -0800 @@ -14,7 +14,7 @@ MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/ LINK_SKAS = -Wl,-rpath,/lib LD_SCRIPT_SKAS = dyn.lds.s -GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h +GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h -$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : - $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h +$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h : + $(call descend,$(ARCH_DIR)/kernel/skas,$@) --- linux-2.6.3-rc2/arch/um/os-Linux/drivers/ethertap_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/os-Linux/drivers/ethertap_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,7 +8,6 @@ #include "linux/init.h" #include "linux/netdevice.h" #include "linux/etherdevice.h" -#include "linux/init.h" #include "net_kern.h" #include "net_user.h" #include "etap.h" --- linux-2.6.3-rc2/arch/um/os-Linux/drivers/ethertap_user.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/um/os-Linux/drivers/ethertap_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -42,13 +41,14 @@ static void etap_change(int op, unsigned { struct addr_change change; void *output; + int n; change.what = op; memcpy(change.addr, addr, sizeof(change.addr)); memcpy(change.netmask, netmask, sizeof(change.netmask)); - if(write(fd, &change, sizeof(change)) != sizeof(change)) - printk("etap_change - request failed, errno = %d\n", - errno); + n = os_write_file(fd, &change, sizeof(change)); + if(n != sizeof(change)) + printk("etap_change - request failed, err = %d\n", -n); output = um_kmalloc(page_size()); if(output == NULL) printk("etap_change : Failed to allocate output buffer\n"); @@ -82,15 +82,15 @@ static void etap_pre_exec(void *arg) struct etap_pre_exec_data *data = arg; dup2(data->control_remote, 1); - close(data->data_me); - close(data->control_me); + os_close_file(data->data_me); + os_close_file(data->control_me); } static int etap_tramp(char *dev, char *gate, int control_me, int control_remote, int data_me, int data_remote) { struct etap_pre_exec_data pe_data; - int pid, status, err; + int pid, status, err, n; char version_buf[sizeof("nnnnn\0")]; char data_fd_buf[sizeof("nnnnnn\0")]; char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; @@ -114,21 +114,21 @@ static int etap_tramp(char *dev, char *g pe_data.data_me = data_me; pid = run_helper(etap_pre_exec, &pe_data, args, NULL); - if(pid < 0) err = errno; - close(data_remote); - close(control_remote); - if(read(control_me, &c, sizeof(c)) != sizeof(c)){ - printk("etap_tramp : read of status failed, errno = %d\n", - errno); - return(EINVAL); + if(pid < 0) err = pid; + os_close_file(data_remote); + os_close_file(control_remote); + n = os_read_file(control_me, &c, sizeof(c)); + if(n != sizeof(c)){ + printk("etap_tramp : read of status failed, err = %d\n", -n); + return(-EINVAL); } if(c != 1){ printk("etap_tramp : uml_net failed\n"); - err = EINVAL; - if(waitpid(pid, &status, 0) < 0) err = errno; - else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){ + err = -EINVAL; + if(waitpid(pid, &status, 0) < 0) + err = -errno; + else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) printk("uml_net didn't exit with status 1\n"); - } } return(err); } @@ -143,14 +143,14 @@ static int etap_open(void *data) if(err) return(err); err = os_pipe(data_fds, 0, 0); - if(err){ - printk("data os_pipe failed - errno = %d\n", -err); + if(err < 0){ + printk("data os_pipe failed - err = %d\n", -err); return(err); } err = os_pipe(control_fds, 1, 0); - if(err){ - printk("control os_pipe failed - errno = %d\n", -err); + if(err < 0){ + printk("control os_pipe failed - err = %d\n", -err); return(err); } @@ -167,9 +167,9 @@ static int etap_open(void *data) kfree(output); } - if(err != 0){ - printk("etap_tramp failed - errno = %d\n", err); - return(-err); + if(err < 0){ + printk("etap_tramp failed - err = %d\n", -err); + return(err); } pri->data_fd = data_fds[0]; @@ -183,11 +183,11 @@ static void etap_close(int fd, void *dat struct ethertap_data *pri = data; iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); - close(fd); + os_close_file(fd); os_shutdown_socket(pri->data_fd, 1, 1); - close(pri->data_fd); + os_close_file(pri->data_fd); pri->data_fd = -1; - close(pri->control_fd); + os_close_file(pri->control_fd); pri->control_fd = -1; } --- linux-2.6.3-rc2/arch/um/os-Linux/drivers/tuntap_user.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/os-Linux/drivers/tuntap_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -61,7 +60,7 @@ static void tuntap_pre_exec(void *arg) struct tuntap_pre_exec_data *data = arg; dup2(data->stdout, 1); - close(data->close_me); + os_close_file(data->close_me); } static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, @@ -86,7 +85,7 @@ static int tuntap_open_tramp(char *gate, if(pid < 0) return(-pid); - close(remote); + os_close_file(remote); msg.msg_name = NULL; msg.msg_namelen = 0; @@ -107,19 +106,19 @@ static int tuntap_open_tramp(char *gate, if(n < 0){ printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", errno); - return(errno); + return(-errno); } waitpid(pid, NULL, 0); cmsg = CMSG_FIRSTHDR(&msg); if(cmsg == NULL){ printk("tuntap_open_tramp : didn't receive a message\n"); - return(EINVAL); + return(-EINVAL); } if((cmsg->cmsg_level != SOL_SOCKET) || (cmsg->cmsg_type != SCM_RIGHTS)){ printk("tuntap_open_tramp : didn't receive a descriptor\n"); - return(EINVAL); + return(-EINVAL); } *fd_out = ((int *) CMSG_DATA(cmsg))[0]; return(0); @@ -133,27 +132,29 @@ static int tuntap_open(void *data) int err, fds[2], len, used; err = tap_open_common(pri->dev, pri->gate_addr); - if(err) return(err); + if(err < 0) + return(err); if(pri->fixed_config){ - if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){ - printk("Failed to open /dev/net/tun, errno = %d\n", - errno); - return(-errno); + pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); + if(pri->fd < 0){ + printk("Failed to open /dev/net/tun, err = %d\n", + -pri->fd); + return(pri->fd); } memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP; + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ - printk("TUNSETIFF failed, errno = %d", errno); - close(pri->fd); + printk("TUNSETIFF failed, errno = %d\n", errno); + os_close_file(pri->fd); return(-errno); } } else { err = os_pipe(fds, 0, 0); - if(err){ - printk("tuntap_open : os_pipe failed - errno = %d\n", + if(err < 0){ + printk("tuntap_open : os_pipe failed - err = %d\n", -err); return(err); } @@ -166,19 +167,19 @@ static int tuntap_open(void *data) fds[1], buffer, len, &used); output = buffer; - if(err == 0){ - pri->dev_name = uml_strdup(buffer); - output += IFNAMSIZ; - printk(output); - free_output_buffer(buffer); - } - else { - printk(output); + if(err < 0) { + printk("%s", output); free_output_buffer(buffer); - printk("tuntap_open_tramp failed - errno = %d\n", err); - return(-err); + printk("tuntap_open_tramp failed - err = %d\n", -err); + return(err); } - close(fds[0]); + + pri->dev_name = uml_strdup(buffer); + output += IFNAMSIZ; + printk("%s", output); + free_output_buffer(buffer); + + os_close_file(fds[0]); iter_addresses(pri->dev, open_addr, pri->dev_name); } @@ -191,7 +192,7 @@ static void tuntap_close(int fd, void *d if(!pri->fixed_config) iter_addresses(pri->dev, close_addr, pri->dev_name); - close(fd); + os_close_file(fd); pri->fd = -1; } --- linux-2.6.3-rc2/arch/um/os-Linux/file.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/os-Linux/file.c 2004-02-12 00:40:26.000000000 -0800 @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -17,33 +19,235 @@ #include "user.h" #include "kern_util.h" -int os_file_type(char *file) +static void copy_stat(struct uml_stat *dst, struct stat64 *src) +{ + *dst = ((struct uml_stat) { + .ust_dev = src->st_dev, /* device */ + .ust_ino = src->st_ino, /* inode */ + .ust_mode = src->st_mode, /* protection */ + .ust_nlink = src->st_nlink, /* number of hard links */ + .ust_uid = src->st_uid, /* user ID of owner */ + .ust_gid = src->st_gid, /* group ID of owner */ + .ust_size = src->st_size, /* total size, in bytes */ + .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ + .ust_blocks = src->st_blocks, /* number of blocks allocated */ + .ust_atime = src->st_atime, /* time of last access */ + .ust_mtime = src->st_mtime, /* time of last modification */ + .ust_ctime = src->st_ctime, /* time of last change */ + }); +} + +int os_stat_fd(const int fd, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + do { + err = fstat64(fd, &sbuf); + } while((err < 0) && (errno == EINTR)) ; + + if(err < 0) + return(-errno); + + if(ubuf != NULL) + copy_stat(ubuf, &sbuf); + return(err); +} + +int os_stat_file(const char *file_name, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + do { + err = stat64(file_name, &sbuf); + } while((err < 0) && (errno == EINTR)) ; + + if(err < 0) + return(-errno); + + if(ubuf != NULL) + copy_stat(ubuf, &sbuf); + return(err); +} + +int os_access(const char* file, int mode) +{ + int amode, err; + + amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | + (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; + + err = access(file, amode); + if(err < 0) + return(-errno); + + return(0); +} + +void os_print_error(int error, const char* str) +{ + errno = error < 0 ? -error : error; + + perror(str); +} + +/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ +int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) +{ + int err; + + err = ioctl(fd, cmd, arg); + if(err < 0) + return(-errno); + + return(err); +} + +int os_window_size(int fd, int *rows, int *cols) +{ + struct winsize size; + + if(ioctl(fd, TIOCGWINSZ, &size) < 0) + return(-errno); + + *rows = size.ws_row; + *cols = size.ws_col; + + return(0); +} + +int os_new_tty_pgrp(int fd, int pid) { - struct stat64 buf; + if(ioctl(fd, TIOCSCTTY, 0) < 0){ + printk("TIOCSCTTY failed, errno = %d\n", errno); + return(-errno); + } + + if(tcsetpgrp(fd, pid) < 0){ + printk("tcsetpgrp failed, errno = %d\n", errno); + return(-errno); + } + + return(0); +} + +/* FIXME: ensure namebuf in os_get_if_name is big enough */ +int os_get_ifname(int fd, char* namebuf) +{ + if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) + return(-errno); + + return(0); +} + +int os_set_slip(int fd) +{ + int disc, sencap; + + disc = N_SLIP; + if(ioctl(fd, TIOCSETD, &disc) < 0){ + printk("Failed to set slip line discipline - " + "errno = %d\n", errno); + return(-errno); + } + + sencap = 0; + if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){ + printk("Failed to set slip encapsulation - " + "errno = %d\n", errno); + return(-errno); + } + + return(0); +} + +int os_set_owner(int fd, int pid) +{ + if(fcntl(fd, F_SETOWN, pid) < 0){ + int save_errno = errno; + + if(fcntl(fd, F_GETOWN, 0) != pid) + return(-save_errno); + } + + return(0); +} + +/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ +int os_sigio_async(int master, int slave) +{ + int flags; - if(stat64(file, &buf) == -1) + flags = fcntl(master, F_GETFL); + if(flags < 0) { + printk("fcntl F_GETFL failed, errno = %d\n", errno); return(-errno); + } + + if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)){ + printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", errno); + return(-errno); + } + + if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){ + printk("fcntl F_SETFL failed, errno = %d\n", errno); + return(-errno); + } - if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); - else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); - else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); - else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); - else if(S_ISFIFO(buf.st_mode)) return(OS_TYPE_FIFO); - else if(S_ISSOCK(buf.st_mode)) return(OS_TYPE_SOCK); + return(0); +} + +int os_mode_fd(int fd, int mode) +{ + int err; + + do { + err = fchmod(fd, mode); + } while((err < 0) && (errno==EINTR)) ; + + if(err < 0) + return(-errno); + + return(0); +} + +int os_file_type(char *file) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if(err < 0) + return(err); + + if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); + else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); + else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); + else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); + else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); + else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); else return(OS_TYPE_FILE); } int os_file_mode(char *file, struct openflags *mode_out) { + int err; + *mode_out = OPENFLAGS(); - if(!access(file, W_OK)) *mode_out = of_write(*mode_out); - else if(errno != EACCES) - return(-errno); + err = os_access(file, OS_ACC_W_OK); + if((err < 0) && (err != -EACCES)) + return(err); - if(!access(file, R_OK)) *mode_out = of_read(*mode_out); - else if(errno != EACCES) - return(-errno); + *mode_out = of_write(*mode_out); + + err = os_access(file, OS_ACC_R_OK); + if((err < 0) && (err != -EACCES)) + return(err); + + *mode_out = of_read(*mode_out); return(0); } @@ -63,16 +267,14 @@ int os_open_file(char *file, struct open if(flags.e) f |= O_EXCL; fd = open64(file, f, mode); - if(fd < 0) return(-errno); - - if(flags.cl){ - if(fcntl(fd, F_SETFD, 1)){ - close(fd); - return(-errno); - } + if(fd < 0) + return(-errno); + + if(flags.cl && fcntl(fd, F_SETFD, 1)){ + os_close_file(fd); + return(-errno); } - return(fd); return(fd); } @@ -90,7 +292,7 @@ int os_connect_socket(char *name) err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); if(err) - return(err); + return(-errno); return(fd); } @@ -109,88 +311,162 @@ int os_seek_file(int fd, __u64 offset) return(0); } -int os_read_file(int fd, void *buf, int len) +static int fault_buffer(void *start, int len, + int (*copy_proc)(void *addr, void *buf, int len)) { - int n; + int page = getpagesize(), i; + char c; - /* Force buf into memory if it's not already. */ + for(i = 0; i < len; i += page){ + if((*copy_proc)(start + i, &c, sizeof(c))) + return(-EFAULT); + } + if((len % page) != 0){ + if((*copy_proc)(start + len - 1, &c, sizeof(c))) + return(-EFAULT); + } + return(0); +} - /* XXX This fails if buf is kernel memory */ -#ifdef notdef - if(copy_to_user_proc(buf, &c, sizeof(c))) - return(-EFAULT); -#endif +static int file_io(int fd, void *buf, int len, + int (*io_proc)(int fd, void *buf, int len), + int (*copy_user_proc)(void *addr, void *buf, int len)) +{ + int n, err; + + do { + n = (*io_proc)(fd, buf, len); + if((n < 0) && (errno == EFAULT)){ + err = fault_buffer(buf, len, copy_user_proc); + if(err) + return(err); + n = (*io_proc)(fd, buf, len); + } + } while((n < 0) && (errno == EINTR)); - n = read(fd, buf, len); if(n < 0) return(-errno); return(n); } -int os_write_file(int fd, void *buf, int count) +int os_read_file(int fd, void *buf, int len) { - int n; - - /* Force buf into memory if it's not already. */ - - /* XXX This fails if buf is kernel memory */ -#ifdef notdef - if(copy_to_user_proc(buf, buf, buf[0])) - return(-EFAULT); -#endif + return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, + copy_from_user_proc)); +} - n = write(fd, buf, count); - if(n < 0) - return(-errno); - return(n); +int os_write_file(int fd, const void *buf, int len) +{ + return(file_io(fd, (void *) buf, len, + (int (*)(int, void *, int)) write, copy_to_user_proc)); } int os_file_size(char *file, long long *size_out) { - struct stat64 buf; + struct uml_stat buf; + int err; - if(stat64(file, &buf) == -1){ - printk("Couldn't stat \"%s\" : errno = %d\n", file, errno); - return(-errno); + err = os_stat_file(file, &buf); + if(err < 0){ + printk("Couldn't stat \"%s\" : err = %d\n", file, -err); + return(err); } - if(S_ISBLK(buf.st_mode)){ + + if(S_ISBLK(buf.ust_mode)){ int fd, blocks; - if((fd = open64(file, O_RDONLY)) < 0){ - printk("Couldn't open \"%s\", errno = %d\n", file, - errno); - return(-errno); + fd = os_open_file(file, of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Couldn't open \"%s\", errno = %d\n", file, -fd); + return(fd); } if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ printk("Couldn't get the block size of \"%s\", " "errno = %d\n", file, errno); - close(fd); - return(-errno); + err = -errno; + os_close_file(fd); + return(err); } *size_out = ((long long) blocks) * 512; - close(fd); + os_close_file(fd); return(0); } - *size_out = buf.st_size; + *size_out = buf.ust_size; + return(0); +} + +int os_file_modtime(char *file, unsigned long *modtime) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if(err < 0){ + printk("Couldn't stat \"%s\" : err = %d\n", file, -err); + return(err); + } + + *modtime = buf.ust_mtime; return(0); } +int os_get_exec_close(int fd, int* close_on_exec) +{ + int ret; + + do { + ret = fcntl(fd, F_GETFD); + } while((ret < 0) && (errno == EINTR)) ; + + if(ret < 0) + return(-errno); + + *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; + return(ret); +} + +int os_set_exec_close(int fd, int close_on_exec) +{ + int flag, err; + + if(close_on_exec) flag = FD_CLOEXEC; + else flag = 0; + + do { + err = fcntl(fd, F_SETFD, flag); + } while((err < 0) && (errno == EINTR)) ; + + if(err < 0) + return(-errno); + return(err); +} + int os_pipe(int *fds, int stream, int close_on_exec) { int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; err = socketpair(AF_UNIX, type, 0, fds); - if(err) + if(err < 0) return(-errno); if(!close_on_exec) return(0); - if((fcntl(fds[0], F_SETFD, 1) < 0) || (fcntl(fds[1], F_SETFD, 1) < 0)) - printk("os_pipe : Setting FD_CLOEXEC failed, errno = %d", - errno); + err = os_set_exec_close(fds[0], 1); + if(err < 0) + goto error; + + err = os_set_exec_close(fds[1], 1); + if(err < 0) + goto error; return(0); + + error: + printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); + os_close_file(fds[1]); + os_close_file(fds[0]); + return(err); } int os_set_fd_async(int fd, int owner) @@ -270,7 +546,7 @@ int os_shutdown_socket(int fd, int r, in return(-EINVAL); } err = shutdown(fd, what); - if(err) + if(err < 0) return(-errno); return(0); } @@ -315,7 +591,7 @@ int os_rcv_fd(int fd, int *helper_pid_ou return(new); } -int create_unix_socket(char *file, int len) +int os_create_unix_socket(char *file, int len, int close_on_exec) { struct sockaddr_un addr; int sock, err; @@ -327,6 +603,13 @@ int create_unix_socket(char *file, int l return(-errno); } + if(close_on_exec) { + err = os_set_exec_close(sock, 1); + if(err < 0) + printk("create_unix_socket : close_on_exec failed, " + "err = %d", -err); + } + addr.sun_family = AF_UNIX; /* XXX Be more careful about overflow */ @@ -334,14 +617,45 @@ int create_unix_socket(char *file, int l err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0){ - printk("create_listening_socket - bind failed, errno = %d\n", - errno); + printk("create_listening_socket at '%s' - bind failed, " + "errno = %d\n", file, errno); return(-errno); } return(sock); } +void os_flush_stdout(void) +{ + fflush(stdout); +} + +int os_lock_file(int fd, int excl) +{ + int type = excl ? F_WRLCK : F_RDLCK; + struct flock lock = ((struct flock) { .l_type = type, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 } ); + int err, save; + + err = fcntl(fd, F_SETLK, &lock); + if(!err) + goto out; + + save = -errno; + err = fcntl(fd, F_GETLK, &lock); + if(err){ + err = -errno; + goto out; + } + + printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); + err = save; + out: + return(err); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.3-rc2/arch/um/os-Linux/process.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/os-Linux/process.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -7,32 +7,37 @@ #include #include #include +#include #include #include #include "os.h" #include "user.h" +#define ARBITRARY_ADDR -1 +#define FAILURE_PID -1 + unsigned long os_process_pc(int pid) { char proc_stat[sizeof("/proc/#####/stat\0")], buf[256]; unsigned long pc; - int fd; + int fd, err; sprintf(proc_stat, "/proc/%d/stat", pid); fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); if(fd < 0){ - printk("os_process_pc - couldn't open '%s', errno = %d\n", - proc_stat, errno); - return(-1); + printk("os_process_pc - couldn't open '%s', err = %d\n", + proc_stat, -fd); + return(ARBITRARY_ADDR); } - if(read(fd, buf, sizeof(buf)) < 0){ - printk("os_process_pc - couldn't read '%s', errno = %d\n", - proc_stat, errno); - close(fd); - return(-1); + err = os_read_file(fd, buf, sizeof(buf)); + if(err < 0){ + printk("os_process_pc - couldn't read '%s', err = %d\n", + proc_stat, -err); + os_close_file(fd); + return(ARBITRARY_ADDR); } - close(fd); - pc = -1; + os_close_file(fd); + pc = ARBITRARY_ADDR; if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d " "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " "%*d %*d %*d %*d %ld", &pc) != 1){ @@ -52,22 +57,23 @@ int os_process_parent(int pid) snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); fd = os_open_file(stat, of_read(OPENFLAGS()), 0); if(fd < 0){ - printk("Couldn't open '%s', errno = %d\n", stat, -fd); - return(-1); + printk("Couldn't open '%s', err = %d\n", stat, -fd); + return(FAILURE_PID); } - n = read(fd, data, sizeof(data)); - close(fd); + n = os_read_file(fd, data, sizeof(data)); + os_close_file(fd); if(n < 0){ - printk("Couldn't read '%s', errno = %d\n", stat); - return(-1); + printk("Couldn't read '%s', err = %d\n", stat, -n); + return(FAILURE_PID); } - parent = -1; + parent = FAILURE_PID; /* XXX This will break if there is a space in the command */ n = sscanf(data, "%*d %*s %*c %d", &parent); - if(n != 1) printk("Failed to scan '%s'\n", data); + if(n != 1) + printk("Failed to scan '%s'\n", data); return(parent); } @@ -87,7 +93,8 @@ void os_kill_process(int pid, int reap_c void os_usr1_process(int pid) { - kill(pid, SIGUSR1); + syscall(__NR_tkill, pid, SIGUSR1); + /* kill(pid, SIGUSR1); */ } int os_getpid(void) @@ -95,7 +102,7 @@ int os_getpid(void) return(getpid()); } -int os_map_memory(void *virt, int fd, unsigned long off, unsigned long len, +int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { void *loc; @@ -104,8 +111,8 @@ int os_map_memory(void *virt, int fd, un prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); - loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, - fd, off); + loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, + fd, off); if(loc == MAP_FAILED) return(-errno); return(0); @@ -126,7 +133,8 @@ int os_unmap_memory(void *addr, int len) int err; err = munmap(addr, len); - if(err < 0) return(-errno); + if(err < 0) + return(-errno); return(0); } --- linux-2.6.3-rc2/arch/um/os-Linux/tty.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/os-Linux/tty.c 2004-02-12 00:40:26.000000000 -0800 @@ -28,10 +28,10 @@ int get_pty(void) struct grantpt_info info; int fd; - if((fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0)) < 0){ - printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n", - errno); - return(-1); + fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); + if(fd < 0){ + printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); + return(fd); } info.fd = fd; @@ -39,7 +39,7 @@ int get_pty(void) if(info.res < 0){ printk("get_pty : Couldn't grant pty - errno = %d\n", - info.err); + -info.err); return(-1); } if(unlockpt(fd) < 0){ --- linux-2.6.3-rc2/arch/um/sys-i386/bugs.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/sys-i386/bugs.c 2004-02-12 00:40:26.000000000 -0800 @@ -4,20 +4,21 @@ */ #include -#include #include #include #include +#include #include "kern_util.h" #include "user.h" #include "sysdep/ptrace.h" #include "task.h" +#include "os.h" #define MAXTOKEN 64 /* Set during early boot */ -int cpu_has_cmov = 1; -int cpu_has_xmm = 0; +int host_has_cmov = 1; +int host_has_xmm = 0; static char token(int fd, char *buf, int len, char stop) { @@ -27,13 +28,15 @@ static char token(int fd, char *buf, int ptr = buf; end = &buf[len]; do { - n = read(fd, ptr, sizeof(*ptr)); + n = os_read_file(fd, ptr, sizeof(*ptr)); c = *ptr++; - if(n == 0) return(0); - else if(n != sizeof(*ptr)){ - printk("Reading /proc/cpuinfo failed, " - "errno = %d\n", errno); - return(-errno); + if(n != sizeof(*ptr)){ + if(n == 0) return(0); + printk("Reading /proc/cpuinfo failed, err = %d\n", -n); + if(n < 0) + return(n); + else + return(-EIO); } } while((c != '\n') && (c != stop) && (ptr < end)); @@ -45,45 +48,79 @@ static char token(int fd, char *buf, int return(c); } -static int check_cpu_feature(char *feature, int *have_it) +static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) { - char buf[MAXTOKEN], c; - int fd, len = sizeof(buf)/sizeof(buf[0]), n; - - printk("Checking for host processor %s support...", feature); - fd = open("/proc/cpuinfo", O_RDONLY); - if(fd < 0){ - printk("Couldn't open /proc/cpuinfo, errno = %d\n", errno); - return(0); - } + int n; + char c; - *have_it = 0; - buf[len - 1] = '\0'; + scratch[len - 1] = '\0'; while(1){ - c = token(fd, buf, len - 1, ':'); - if(c <= 0) goto out; + c = token(fd, scratch, len - 1, ':'); + if(c <= 0) + return(0); else if(c != ':'){ printk("Failed to find ':' in /proc/cpuinfo\n"); - goto out; + return(0); } - if(!strncmp(buf, "flags", strlen("flags"))) break; + if(!strncmp(scratch, key, strlen(key))) + return(1); do { - n = read(fd, &c, sizeof(c)); + n = os_read_file(fd, &c, sizeof(c)); if(n != sizeof(c)){ printk("Failed to find newline in " - "/proc/cpuinfo, n = %d, errno = %d\n", - n, errno); - goto out; + "/proc/cpuinfo, err = %d\n", -n); + return(0); } } while(c != '\n'); } + return(0); +} + +int cpu_feature(char *what, char *buf, int len) +{ + int fd, ret = 0; + + fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); + return(0); + } + + if(!find_cpuinfo_line(fd, what, buf, len)){ + printk("Couldn't find '%s' line in /proc/cpuinfo\n", what); + goto out_close; + } + + token(fd, buf, len, '\n'); + ret = 1; + + out_close: + os_close_file(fd); + return(ret); +} + +static int check_cpu_flag(char *feature, int *have_it) +{ + char buf[MAXTOKEN], c; + int fd, len = sizeof(buf)/sizeof(buf[0]); + + printk("Checking for host processor %s support...", feature); + fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); + return(0); + } + + *have_it = 0; + if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) + goto out; c = token(fd, buf, len - 1, ' '); if(c < 0) goto out; else if(c != ' '){ - printk("Failed to find ':' in /proc/cpuinfo\n"); + printk("Failed to find ' ' in /proc/cpuinfo\n"); goto out; } @@ -100,21 +137,48 @@ static int check_cpu_feature(char *featu out: if(*have_it == 0) printk("No\n"); else if(*have_it == 1) printk("Yes\n"); - close(fd); + os_close_file(fd); return(1); } +#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems + * for some people. + */ +static void disable_lcall(void) +{ + struct modify_ldt_ldt_s ldt; + int err; + + bzero(&ldt, sizeof(ldt)); + ldt.entry_number = 7; + ldt.base_addr = 0; + ldt.limit = 0; + err = modify_ldt(1, &ldt, sizeof(ldt)); + if(err) + printk("Failed to disable lcall7 - errno = %d\n", errno); +} +#endif + +void arch_init_thread(void) +{ +#if 0 + disable_lcall(); +#endif +} + void arch_check_bugs(void) { int have_it; - if(access("/proc/cpuinfo", R_OK)){ + if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){ printk("/proc/cpuinfo not available - skipping CPU capability " "checks\n"); return; } - if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it; - if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it; + if(check_cpu_flag("cmov", &have_it)) + host_has_cmov = have_it; + if(check_cpu_flag("xmm", &have_it)) + host_has_xmm = have_it; } int arch_handle_signal(int sig, union uml_pt_regs *regs) @@ -130,18 +194,18 @@ int arch_handle_signal(int sig, union um if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40)) return(0); - if(cpu_has_cmov == 0) + if(host_has_cmov == 0) panic("SIGILL caused by cmov, which this processor doesn't " "implement, boot a filesystem compiled for older " "processors"); - else if(cpu_has_cmov == 1) + else if(host_has_cmov == 1) panic("SIGILL caused by cmov, which this processor claims to " "implement"); - else if(cpu_has_cmov == -1) + else if(host_has_cmov == -1) panic("SIGILL caused by cmov, couldn't tell if this processor " "implements it, boot a filesystem compiled for older " "processors"); - else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov); + else panic("Bad value for host_has_cmov (%d)", host_has_cmov); return(0); } --- linux-2.6.3-rc2/arch/um/sys-i386/Makefile 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/sys-i386/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -1,7 +1,8 @@ -obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \ - ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o +obj-y = bugs.o checksum.o fault.o ksyms.o ldt.o ptrace.o \ + ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o time.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_MODULES) += module.o USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) @@ -9,6 +10,8 @@ USER_OBJS := $(foreach file,$(USER_OBJS) SYMLINKS = semaphore.c highmem.c module.c SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f) +clean-files := $(SYMLINKS) + semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel @@ -24,8 +27,7 @@ $(USER_OBJS) : %.o: %.c $(SYMLINKS): $(call make_link,$@) -clean: - $(MAKE) -C util clean +subdir- := util fastdep: --- linux-2.6.3-rc2/arch/um/sys-i386/ptrace_user.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/sys-i386/ptrace_user.c 2004-02-12 00:40:26.000000000 -0800 @@ -39,10 +39,10 @@ static void write_debugregs(int pid, uns nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); for(i = 0; i < nregs; i++){ if((i == 4) || (i == 5)) continue; - if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], + if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i], regs[i]) < 0) - printk("write_debugregs - ptrace failed, " - "errno = %d\n", errno); + printk("write_debugregs - ptrace failed on " + "register %d, errno = %d\n", errno); } } @@ -54,7 +54,7 @@ static void read_debugregs(int pid, unsi dummy = NULL; nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); for(i = 0; i < nregs; i++){ - regs[i] = ptrace(PTRACE_PEEKUSR, pid, + regs[i] = ptrace(PTRACE_PEEKUSER, pid, &dummy->u_debugreg[i], 0); } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/sys-i386/time.c 2004-02-12 00:40:26.000000000 -0800 @@ -0,0 +1,24 @@ +/* + * sys-i386/time.c + * Created 25.9.2002 Sapan Bhatia + * + */ + +unsigned long long time_stamp(void) +{ + unsigned long low, high; + + asm("rdtsc" : "=a" (low), "=d" (high)); + return((((unsigned long long) high) << 32) + low); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.3-rc2/arch/um/sys-i386/util/Makefile 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/sys-i386/util/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -6,10 +6,10 @@ targets := mk_thread_kern.o mk_thread_u mk_sc-objs := mk_sc.o $(obj)/mk_thread : $(obj)/mk_thread_kern.o $(obj)/mk_thread_user.o - $(CC) $(CFLAGS) -o $@ $^ + $(HOSTCC) $(CFLAGS) -o $@ $^ $(obj)/mk_thread_user.o : $(src)/mk_thread_user.c - $(CC) $(USER_CFLAGS) -c -o $@ $< + $(HOSTCC) $(USER_CFLAGS) -c -o $@ $< clean : $(RM) -f $(build-targets) --- linux-2.6.3-rc2/arch/um/sys-i386/util/mk_sc.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/sys-i386/util/mk_sc.c 2004-02-12 00:40:26.000000000 -0800 @@ -38,6 +38,7 @@ int main(int argc, char **argv) SC_OFFSET("SC_ERR", err); SC_OFFSET("SC_CR2", cr2); SC_OFFSET("SC_FPSTATE", fpstate); + SC_OFFSET("SC_SIGMASK", oldmask); SC_FP_OFFSET("SC_FP_CW", cw); SC_FP_OFFSET("SC_FP_SW", sw); SC_FP_OFFSET("SC_FP_TAG", tag); --- linux-2.6.3-rc2/arch/um/uml.lds.S 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/uml.lds.S 2004-02-12 00:40:26.000000000 -0800 @@ -9,7 +9,6 @@ SECTIONS { . = START + SIZEOF_HEADERS; - . = ALIGN(4096); __binary_start = .; #ifdef MODE_TT .thread_private : { @@ -26,7 +25,11 @@ SECTIONS . = ALIGN(4096); /* Init code and data */ _stext = .; __init_begin = .; - .text.init : { *(.text.init) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } . = ALIGN(4096); .text : { @@ -38,7 +41,7 @@ SECTIONS #include "asm/common.lds.S" - .data.init : { *(.data.init) } + init.data : { *(init.data) } .data : { . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ --- linux-2.6.3-rc2/arch/um/util/Makefile 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/util/Makefile 2004-02-12 00:40:26.000000000 -0800 @@ -3,19 +3,19 @@ targets := mk_task_user.o mk_task_kern. mk_constants_user.o mk_constants_kern.o $(obj)/mk_task: $(obj)/mk_task_user.o $(obj)/mk_task_kern.o - $(CC) -o $@ $^ + $(HOSTCC) -o $@ $^ $(obj)/mk_task_user.o: $(src)/mk_task_user.c - $(CC) -o $@ -c $< + $(HOSTCC) -o $@ -c $< $(obj)/mk_constants : $(obj)/mk_constants_user.o $(obj)/mk_constants_kern.o - $(CC) -o $@ $^ + $(HOSTCC) -o $@ $^ $(obj)/mk_constants_user.o : $(src)/mk_constants_user.c - $(CC) -c $< -o $@ + $(HOSTCC) -c $< -o $@ $(obj)/mk_constants_kern.o : $(src)/mk_constants_kern.c - $(CC) $(CFLAGS) -c $< -o $@ + $(HOSTCC) $(CFLAGS) -c $< -o $@ clean: $(RM) $(build-targets) --- linux-2.6.3-rc2/arch/um/util/mk_constants_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/util/mk_constants_kern.c 2004-02-12 00:40:26.000000000 -0800 @@ -1,5 +1,6 @@ #include "linux/kernel.h" #include "linux/stringify.h" +#include "linux/time.h" #include "asm/page.h" extern void print_head(void); @@ -11,6 +12,7 @@ int main(int argc, char **argv) { print_head(); print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); + print_constant_str("UM_KERN_EMERG", KERN_EMERG); print_constant_str("UM_KERN_ALERT", KERN_ALERT); print_constant_str("UM_KERN_CRIT", KERN_CRIT); @@ -19,6 +21,8 @@ int main(int argc, char **argv) print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); print_constant_str("UM_KERN_INFO", KERN_INFO); print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); + + print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC); print_tail(); return(0); } --- linux-2.6.3-rc2/arch/x86_64/ia32/ia32_ioctl.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/ia32/ia32_ioctl.c 2004-02-12 00:40:52.000000000 -0800 @@ -10,12 +10,11 @@ */ #define INCLUDES +#include #include "compat_ioctl.c" #include #include -extern asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - #define CODE #include "compat_ioctl.c" --- linux-2.6.3-rc2/arch/x86_64/ia32/ptrace32.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/ia32/ptrace32.c 2004-02-12 00:40:57.000000000 -0800 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -223,8 +224,6 @@ static struct task_struct *find_target(i } -extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data); - asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) { struct task_struct *child; --- linux-2.6.3-rc2/arch/x86_64/ia32/sys_ia32.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/x86_64/ia32/sys_ia32.c 2004-02-12 00:40:55.000000000 -0800 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -110,9 +111,6 @@ int cp_compat_stat(struct kstat *kbuf, s return 0; } -extern long sys_truncate(char *, loff_t); -extern long sys_ftruncate(int, loff_t); - asmlinkage long sys32_truncate64(char * filename, unsigned long offset_low, unsigned long offset_high) { @@ -236,8 +234,6 @@ sys32_mmap(struct mmap_arg_struct *arg) return retval; } -extern asmlinkage long sys_mprotect(unsigned long start,size_t len,unsigned long prot); - asmlinkage long sys32_mprotect(unsigned long start, size_t len, unsigned long prot) { @@ -363,12 +359,9 @@ sys32_sigaction (int sig, struct old_sig return ret; } -extern asmlinkage long sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, - size_t sigsetsize); - asmlinkage long -sys32_rt_sigprocmask(int how, compat_sigset_t *set, compat_sigset_t *oset, - unsigned int sigsetsize) +sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, unsigned int sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -734,9 +727,6 @@ sys32_old_select(struct sel_arg_struct * (struct compat_timeval *)A(a.tvp)); } -asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long); -asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long); - static struct iovec * get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp) { @@ -878,18 +868,12 @@ int sys32_ni_syscall(int call) /* 32-bit timeval and related flotsam. */ -extern asmlinkage long sys_sysfs(int option, unsigned long arg1, - unsigned long arg2); - asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); } -extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, - unsigned long new_flags, void *data); - static char *badfs[] = { "smbfs", "ncpfs", NULL }; @@ -940,8 +924,6 @@ struct sysinfo32 { char _f[20-2*sizeof(u32)-sizeof(int)]; }; -extern asmlinkage long sys_sysinfo(struct sysinfo *info); - asmlinkage long sys32_sysinfo(struct sysinfo32 *info) { @@ -991,9 +973,6 @@ sys32_sysinfo(struct sysinfo32 *info) return 0; } -extern asmlinkage long sys_sched_rr_get_interval(pid_t pid, - struct timespec *interval); - asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) { @@ -1009,10 +988,8 @@ sys32_sched_rr_get_interval(compat_pid_t return ret; } -extern asmlinkage long sys_rt_sigpending(sigset_t *set, size_t sigsetsize); - asmlinkage long -sys32_rt_sigpending(compat_sigset_t *set, compat_size_t sigsetsize) +sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; @@ -1035,9 +1012,6 @@ sys32_rt_sigpending(compat_sigset_t *set return ret; } -extern asmlinkage long -sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, - const struct timespec *uts, size_t sigsetsize); asmlinkage long sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo, @@ -1077,9 +1051,6 @@ sys32_rt_sigtimedwait(compat_sigset_t *u return ret; } -extern asmlinkage long -sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); - asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { @@ -1165,12 +1136,6 @@ sys32_sysctl(struct sysctl_ia32 *args32) #endif } -extern asmlinkage ssize_t sys_pread64(unsigned int fd, char * buf, - size_t count, loff_t pos); - -extern asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char * buf, - size_t count, loff_t pos); - /* warning: next two assume little endian */ asmlinkage long sys32_pread(unsigned int fd, char *ubuf, u32 count, u32 poslo, u32 poshi) @@ -1187,8 +1152,6 @@ sys32_pwrite(unsigned int fd, char *ubuf } -extern asmlinkage long sys_personality(unsigned long); - asmlinkage long sys32_personality(unsigned long personality) { @@ -1202,9 +1165,6 @@ sys32_personality(unsigned long personal return ret; } -extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, - size_t count); - asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { @@ -1375,9 +1335,7 @@ long sys32_uname(struct old_utsname * na return err?-EFAULT:0; } -extern int sys_ustat(dev_t, struct ustat *); - -long sys32_ustat(unsigned dev, struct ustat32 *u32p) +long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) { struct ustat u; mm_segment_t seg; @@ -1500,15 +1458,11 @@ asmlinkage long sys32_clone(unsigned int * Some system calls that need sign extended arguments. This could be done by a generic wrapper. */ -extern off_t sys_lseek (unsigned int fd, off_t offset, unsigned int origin); - long sys32_lseek (unsigned int fd, int offset, unsigned int whence) { return sys_lseek(fd, offset, whence); } -extern int sys_kill(pid_t pid, int sig); - long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); @@ -1736,15 +1690,12 @@ done: return err; } #else /* !NFSD */ -extern asmlinkage long sys_ni_syscall(void); long asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2) { return sys_ni_syscall(); } #endif -extern long sys_io_setup(unsigned nr_reqs, aio_context_t *ctx); - long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p) { long ret; @@ -1802,11 +1753,6 @@ asmlinkage long sys32_io_submit(aio_cont return i ? i : ret; } -extern asmlinkage long sys_io_getevents(aio_context_t ctx_id, - long min_nr, - long nr, - struct io_event *events, - struct timespec *timeout); asmlinkage long sys32_io_getevents(aio_context_t ctx_id, unsigned long min_nr, --- linux-2.6.3-rc2/arch/x86_64/Kconfig 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/Kconfig 2004-02-12 00:41:12.000000000 -0800 @@ -48,12 +48,14 @@ config EARLY_PRINTK bool default y help - Write kernel log output directly into the VGA buffer. This is useful - for kernel debugging when your machine crashes very early before - the console code is initialized. For normal operation it is not - recommended because it looks ugly and doesn't cooperate with - klogd/syslogd or the X server. You should normally N here, unless - you want to debug such a crash. + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. config HPET_TIMER bool @@ -111,10 +113,6 @@ config X86_TSC bool default y -config X86_GOOD_APIC - bool - default y - config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" help @@ -433,6 +431,7 @@ config INIT_DEBUG config DEBUG_INFO bool "Compile the kernel with debug info" depends on DEBUG_KERNEL + default n help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. @@ -464,9 +463,8 @@ config IOMMU_LEAK help Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. - -#config X86_REMOTE_DEBUG -# bool "kgdb debugging stub" + +source "arch/x86_64/Kconfig.kgdb" endmenu --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/Kconfig.kgdb 2004-02-12 00:39:48.000000000 -0800 @@ -0,0 +1,176 @@ +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + select DEBUG_INFO + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + --- linux-2.6.3-rc2/arch/x86_64/kernel/acpi/boot.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/x86_64/kernel/acpi/boot.c 2004-02-12 00:39:33.000000000 -0800 @@ -120,6 +120,11 @@ acpi_parse_lapic ( acpi_table_print_madt_entry(header); + /* no utility in registering a disabled processor */ + if (processor->flags.enabled == 0) + return 0; + + mp_register_lapic ( processor->id, /* APIC ID */ processor->flags.enabled); /* Enabled? */ --- linux-2.6.3-rc2/arch/x86_64/kernel/early_printk.c 2003-06-14 12:18:26.000000000 -0700 +++ 25/arch/x86_64/kernel/early_printk.c 2004-02-12 00:41:12.000000000 -0800 @@ -7,7 +7,11 @@ /* Simple VGA output */ +#ifdef __i386__ +#define VGABASE __pa(__PAGE_OFFSET + 0xb8000UL) +#else #define VGABASE 0xffffffff800b8000UL +#endif #define MAX_YPOS 25 #define MAX_XPOS 80 @@ -22,15 +26,14 @@ static void early_vga_write(struct conso while ((c = *str++) != '\0' && n-- > 0) { if (current_ypos >= MAX_YPOS) { /* scroll 1 line up */ - for(k = 1, j = 0; k < MAX_YPOS; k++, j++) { - for(i = 0; i < MAX_XPOS; i++) { + for (k = 1, j = 0; k < MAX_YPOS; k++, j++) { + for (i = 0; i < MAX_XPOS; i++) { writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), VGABASE + 2*(MAX_XPOS*j + i)); } } - for(i = 0; i < MAX_XPOS; i++) { + for (i = 0; i < MAX_XPOS; i++) writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); - } current_ypos = MAX_YPOS-1; } if (c == '\n') { @@ -38,7 +41,8 @@ static void early_vga_write(struct conso current_ypos++; } else if (c != '\r') { writew(((0x7 << 8) | (unsigned short) c), - VGABASE + 2*(MAX_XPOS*current_ypos + current_xpos++)); + VGABASE + 2*(MAX_XPOS*current_ypos + + current_xpos++)); if (current_xpos >= MAX_XPOS) { current_xpos = 0; current_ypos++; @@ -78,7 +82,7 @@ static int early_serial_putc(unsigned ch { unsigned timeout = 0xffff; while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) - rep_nop(); + cpu_relax(); outb(ch, early_serial_base + TXR); return timeout ? 0 : -1; } @@ -93,10 +97,13 @@ static void early_serial_write(struct co } } +#define DEFAULT_BAUD 9600 + static __init void early_serial_init(char *opt) { unsigned char c; - unsigned divisor, baud = 38400; + unsigned divisor; + unsigned baud = DEFAULT_BAUD; char *s, *e; if (*opt == ',') @@ -109,25 +116,26 @@ static __init void early_serial_init(cha early_serial_base = simple_strtoul(s, &e, 16); } else { static int bases[] = { 0x3f8, 0x2f8 }; - if (!strncmp(s,"ttyS",4)) - s+=4; - port = simple_strtoul(s, &e, 10); - if (port > 1 || s == e) - port = 0; - early_serial_base = bases[port]; - } + + if (!strncmp(s,"ttyS",4)) + s += 4; + port = simple_strtoul(s, &e, 10); + if (port > 1 || s == e) + port = 0; + early_serial_base = bases[port]; + } } - outb(0x3, early_serial_base + LCR); /* 8n1 */ - outb(0, early_serial_base + IER); /* no interrupt */ - outb(0, early_serial_base + FCR); /* no fifo */ - outb(0x3, early_serial_base + MCR); /* DTR + RTS */ + outb(0x3, early_serial_base + LCR); /* 8n1 */ + outb(0, early_serial_base + IER); /* no interrupt */ + outb(0, early_serial_base + FCR); /* no fifo */ + outb(0x3, early_serial_base + MCR); /* DTR + RTS */ s = strsep(&opt, ","); if (s != NULL) { baud = simple_strtoul(s, &e, 0); if (baud == 0 || s == e) - baud = 38400; + baud = DEFAULT_BAUD; } divisor = 115200 / baud; @@ -154,8 +162,9 @@ void early_printk(const char *fmt, ...) char buf[512]; int n; va_list ap; + va_start(ap,fmt); - n = vsnprintf(buf,512,fmt,ap); + n = vscnprintf(buf,512,fmt,ap); early_console->write(early_console,buf,n); va_end(ap); } @@ -170,6 +179,8 @@ int __init setup_early_printk(char *opt) if (early_console_initialized) return -1; + opt = strchr(opt, '=') + 1; + strlcpy(buf,opt,sizeof(buf)); space = strchr(buf, ' '); if (space) @@ -200,19 +211,12 @@ void __init disable_early_printk(void) if (!early_console_initialized || !early_console) return; if (!keep_early) { - printk("disabling early console...\n"); + printk("disabling early console\n"); unregister_console(early_console); early_console_initialized = 0; } else { - printk("keeping early console.\n"); + printk("keeping early console\n"); } } -/* syntax: earlyprintk=vga - earlyprintk=serial[,ttySn[,baudrate]] - Append ,keep to not disable it when the real console takes over. - Only vga or serial at a time, not both. - Currently only ttyS0 and ttyS1 are supported. - Interaction with the standard serial driver is not very good. - The VGA output is eventually overwritten by the real console. */ -__setup("earlyprintk=", setup_early_printk); +__setup("earlyprintk=", setup_early_printk); --- linux-2.6.3-rc2/arch/x86_64/kernel/head64.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/x86_64/kernel/head64.c 2004-02-12 00:41:12.000000000 -0800 @@ -83,9 +83,9 @@ void __init x86_64_start_kernel(char * r /* default console: */ if (!strstr(saved_command_line, "console=")) strcat(saved_command_line, " console=tty0"); - s = strstr(saved_command_line, "earlyprintk="); + s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) - setup_early_printk(s+12); + setup_early_printk(s); #ifdef CONFIG_DISCONTIGMEM s = strstr(saved_command_line, "numa="); if (s != NULL) --- linux-2.6.3-rc2/arch/x86_64/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/irq.c 2004-02-12 00:41:09.000000000 -0800 @@ -405,6 +405,9 @@ out: spin_unlock(&desc->lock); irq_exit(); + + kgdb_process_breakpoint(); + return 1; } @@ -826,7 +829,7 @@ static cpumask_t irq_affinity [NR_IRQS] static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); + int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -864,7 +867,7 @@ static int irq_affinity_write_proc (stru static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/kernel/kgdb_stub.c 2004-02-12 00:39:48.000000000 -0800 @@ -0,0 +1,2595 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * X86_64 changes from Andi Kleen's patch merged by Jim Houston + * (jim.houston@ccur.com). If it works thank Andi if its broken + * blame me. + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define Dearly_printk(x...) +int kgdb_enabled = 0; + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES (NUMREGS * sizeof(unsigned long)) +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + * + * Could add XMM and segment registers here. + */ +enum regnames {_RAX, + _RBX, + _RCX, + _RDX, + _RSI, + _RDI, + _RBP, + _RSP, + _R8, + _R9, + _R10, + _R11, + _R12, + _R13, + _R14, + _R15, + _PC, + _PS, + NUMREGS }; + + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned long rsp; + unsigned long array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned long OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movq %rax,%rsp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addq $-8,%rbx\n\t" + "movq (%rbx), %rax\n\t" + "pushq %rax\n\t" + "cmpq %rsp,%rcx\n\t" + "jne 1b\n\t" + "popq %rax\n\t" + "popq %rbx\n\t" + "popq %rcx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static char lbuf[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("RAX=%016lx RBX=%016lx RCX=%016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX=%016lx RSI=%016lx RDI=%016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP=%016lx PS=%016lx PC=%016lx\n", + regs->rbp, regs->eflags, regs->rip); + printk("R8=%016lx R9=%016lx R10=%016lx\n", + regs->r8, regs->r9, regs->r10); + printk("R11=%016lx R12=%016lx R13=%016lx\n", + regs->r11, regs->r12, regs->r13); + printk("R14=%016lx R15=%016lx RSP=%016lx\n", + regs->r14, regs->r15, regs->rsp); +} + +#define NEW_esp fn_call_lookaside[trap_cpu].rsp + +static void +regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_RAX] = regs->rax; + gdb_regs[_RBX] = regs->rbx; + gdb_regs[_RCX] = regs->rcx; + gdb_regs[_RDX] = regs->rdx; + gdb_regs[_RSI] = regs->rsi; + gdb_regs[_RDI] = regs->rdi; + gdb_regs[_RBP] = regs->rbp; + gdb_regs[ _PS] = regs->eflags; + gdb_regs[ _PC] = regs->rip; + gdb_regs[ _R8] = regs->r8; + gdb_regs[ _R9] = regs->r9; + gdb_regs[_R10] = regs->r10; + gdb_regs[_R11] = regs->r11; + gdb_regs[_R12] = regs->r12; + gdb_regs[_R13] = regs->r13; + gdb_regs[_R14] = regs->r14; + gdb_regs[_R15] = regs->r15; + gdb_regs[_RSP] = regs->rsp; + + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ +} + +static void +gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->rax = gdb_regs[_RAX] ; + regs->rbx = gdb_regs[_RBX] ; + regs->rcx = gdb_regs[_RCX] ; + regs->rdx = gdb_regs[_RDX] ; + regs->rsi = gdb_regs[_RSI] ; + regs->rdi = gdb_regs[_RDI] ; + regs->rbp = gdb_regs[_RBP] ; + regs->eflags = gdb_regs[ _PS] ; + regs->rip = gdb_regs[ _PC] ; + regs->r8 = gdb_regs[ _R8] ; + regs->r9 = gdb_regs[ _R9] ; + regs->r10 = gdb_regs[ _R10] ; + regs->r11 = gdb_regs[ _R11] ; + regs->r12 = gdb_regs[ _R12] ; + regs->r13 = gdb_regs[ _R13] ; + regs->r14 = gdb_regs[ _R14] ; + regs->r15 = gdb_regs[ _R15] ; + #if 0 /* can't change these */ + regs->rsp = gdb_regs[_RSP] ; + regs->ss = gdb_regs[ _SS] ; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif +} /* gdb_regs_to_regs */ + +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; +extern void thread_return(void); + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, unsigned long *gdb_regs) +{ + unsigned long **rbp, *rsp, *rsp0, pc; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_RSP] = + (unsigned long)&kgdb_info.cpus_waiting[i].regs->rsp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + rsp = (unsigned long *)p->thread.rsp; + rbp = (unsigned long **)rsp[0]; + rsp += 2; + gdb_regs[_PC] = (unsigned long)thread_return; + gdb_regs[_RBP] = (unsigned long)rbp; + gdb_regs[_RSP] = (unsigned long)rsp; + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + rsp0 = (unsigned long *)p->thread.rsp0; + if (rsp < (unsigned long *) p->thread_info || rsp > rsp0) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (*rbp < rsp || *rbp > rsp0) + break; + rbp = (unsigned long **)*rbp; + rsp = (unsigned long *)rbp; + pc = rsp[1]; + + if (pc < first_sched || pc >= last_sched) + break; + gdb_regs[_PC] = (unsigned long)pc; + gdb_regs[_RSP] = (unsigned long)rsp; + gdb_regs[_RBP] = (unsigned long)rbp; + } while (count++ < 16); + return; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* returns nonzero if any memory access fails. */ +int mem2hex( char* mem, char* buf, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (ret) { + Dearly_printk("mem2hex: fault at accessing %p\n", mem); + } + return(ret); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return nonzero if any memory access fails. */ +int hex2mem( char* buf, char* mem, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} +#endif + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToLong(char **ptr, unsigned long *value) +{ + int numChars = 0; + int hexValue; + + *value = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *value = (*value << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + if (!cpu_online(pid - PID_MAX)) + return NULL; + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned long addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned long hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned long dr7; + + asm volatile ("movq %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned long addr0, addr1, addr2, addr3; + asm volatile ("movq %%db0, %0\n" + "movq %%db1, %1\n" + "movq %%db2, %2\n" + "movq %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movq %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movq %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movq %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movq %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movq %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned long flags; + int cpu; + if (!kgdb_enabled) + return 0; + cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned long dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * The ThreadExtraInfo query allows us to pass an arbitrary string + * for display with the "info threads" command. + */ + +void +print_extra_info(task_t *p, char *buf) +{ + if (!p) { + sprintf(buf, "Invalid thread"); + return; + } + sprintf(buf, "0x%p %8d %4d %c %s", + (void *)p, p->parent->pid, + task_cpu(p), + (p->state == 0) ? (task_curr(p)?'R':'r') : + (p->state < 0) ? 'U' : + (p->state & TASK_UNINTERRUPTIBLE) ? 'D' : + (p->state & TASK_STOPPED || p->ptrace & PT_PTRACED) ? 'T' : + (p->state & (TASK_ZOMBIE | TASK_DEAD)) ? 'Z' : + (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?', + p->comm); +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + struct task_struct *p; + unsigned long addr, length; + unsigned long breakno, breaktype; + char *ptr; + unsigned long newPC; + threadref thref; + unsigned long threadid, tmpid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + unsigned long gdb_regs[NUMREGS]; + unsigned long dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->cs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time, end_time, dum; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movq %0,%%db7" + : /* no output */ + :"r"(0UL)); + + asm volatile ("movq %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +#if 0 +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } +#endif + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (unsigned long) (&linux_regs->rsp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). + */ + unsigned long regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToLong(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + if (regno >= NUMREGS) + break; + hex2mem(ptr, (char *) &gdb_regs[regno], + 8); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && (hexToLong(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + if (mem2hex((char *) addr, + remcomOutBuffer, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToLong(&ptr, &length)) && (*(ptr++) == ':')) { + if (hex2mem(ptr, (char *) addr, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%lx\n", addr); + + regs.rip = addr; + } + + newPC = regs.rip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movq %0, %%db6\n"::"r" (0UL)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + /* If start flag set start at 0. */ + if (remcomInBuffer[2] == '1') + threadid = 0; + else + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T': + ptr = &remcomInBuffer[0]; + if (strncmp(ptr, "qThreadExtraInfo,", + strlen("qThreadExtraInfo,")) == 0) { + ptr += strlen("qThreadExtraInfo,"); + hexToLong(&ptr, &tmpid); + p = getthread(tmpid); + print_extra_info(p, lbuf); + mem2hex(lbuf, remcomOutBuffer, + strlen(lbuf)); + } + break; +#if 0 + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } +#endif + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + ptr++; + hexToLong(&ptr, &breaktype); + ptr++; + hexToLong(&ptr, &length); + ptr++; + hexToLong(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + unsigned long *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (unsigned long); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->cs; + *--ptr = linux_regs->rip; + *--ptr = linux_regs->rcx; + *--ptr = linux_regs->rbx; + *--ptr = linux_regs->rax; + linux_regs->rcx = NEW_esp - (sizeof (unsigned long) * 6); + linux_regs->rbx = (unsigned long) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->rip = (unsigned long) fn_call_stub; + } else { + linux_regs->rip = (unsigned long) fn_rtn_stub; + linux_regs->rax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + local_irq_restore(flags); + return (1); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + local_irq_restore(flags); + return (1); +} + +#undef regs +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + struct die_args *d = ptr; + + if (!kgdb_enabled || (cmd == DIE_DEBUG && user_mode(d->regs))) + return NOTIFY_DONE; + if (cmd == DIE_NMI_IPI) { + if (in_kgdb(d->regs)) + return NOTIFY_BAD; + } else if (kgdb_handle_exception(d->trapnr, d->signr, d->err, d->regs)) + return NOTIFY_BAD; /* skip */ + + return NOTIFY_DONE; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + .priority = 0, +}; + +void set_debug_traps(void) +{ + static int initialized = 0; + + if (!initialized) { + initialized = 1; + notifier_chain_register(&die_chain, &kgdb_notifier); + } +} + +/* + * Provide the command line "gdb" initial break + */ +int __init kgdb_initial_break(char * str) +{ + if (*str == '\0'){ + breakpoint(); + return 1; + } + return 0; +} +__setup("gdb",kgdb_initial_break); + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +void breakpoint(void) +{ + + set_debug_traps(); + kgdb_enabled = 1; +#if 0 + /* + * These calls were not enough to allow breakpoint to be + * called before trap_init(). I moved the argument parsing + * after trap_init() and it seems to work. + */ + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); +#endif + + BREAKPOINT; +} + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + kgdb_enabled = 1; + BREAKPOINT; + } +} + --- linux-2.6.3-rc2/arch/x86_64/kernel/Makefile 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/Makefile 2004-02-12 00:39:48.000000000 -0800 @@ -24,6 +24,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o a obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-y += topology.o --- linux-2.6.3-rc2/arch/x86_64/kernel/mpparse.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/kernel/mpparse.c 2004-02-12 00:39:33.000000000 -0800 @@ -487,7 +487,7 @@ void __init get_smp_config (void) /* * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_HT_ONLY). Note that + * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ --- linux-2.6.3-rc2/arch/x86_64/kernel/smp.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/smp.c 2004-02-12 00:39:48.000000000 -0800 @@ -362,6 +362,18 @@ void smp_send_reschedule(int cpu) send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif + /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. --- linux-2.6.3-rc2/arch/x86_64/kernel/sys_x86_64.c 2004-02-09 21:35:51.000000000 -0800 +++ 25/arch/x86_64/kernel/sys_x86_64.c 2004-02-12 00:39:25.000000000 -0800 @@ -110,8 +110,8 @@ arch_get_unmapped_area(struct file *filp if (end - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; - } else - addr = mm->free_area_cache; + } + addr = mm->free_area_cache; if (addr < begin) addr = begin; start_addr = addr; --- linux-2.6.3-rc2/arch/x86_64/kernel/traps.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/kernel/traps.c 2004-02-12 00:39:48.000000000 -0800 @@ -45,6 +45,9 @@ #include #include +#ifdef CONFIG_KGDB +#include +#endif extern struct gate_struct idt_table[256]; --- linux-2.6.3-rc2/arch/x86_64/kernel/x8664_ksyms.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/kernel/x8664_ksyms.c 2004-02-12 00:40:52.000000000 -0800 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -180,8 +181,6 @@ EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(__memcpy); /* syscall export needed for misdesigned sound drivers. */ -extern ssize_t sys_read(unsigned int fd, char * buf, size_t count); -extern off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin); EXPORT_SYMBOL(sys_read); EXPORT_SYMBOL(sys_lseek); EXPORT_SYMBOL(sys_open); --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/lib/kgdb_serial.c 2004-02-12 00:39:48.000000000 -0800 @@ -0,0 +1,490 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + set_debug_traps(); + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#define kgdb_mem_init_done() (1) + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.3-rc2/arch/x86_64/lib/Makefile 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/x86_64/lib/Makefile 2004-02-12 00:39:48.000000000 -0800 @@ -11,3 +11,4 @@ lib-y += memcpy.o memmove.o memset.o cop lib-$(CONFIG_IO_DEBUG) += iodebug.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o --- linux-2.6.3-rc2/arch/x86_64/Makefile 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/x86_64/Makefile 2004-02-12 00:40:32.000000000 -0800 @@ -52,7 +52,10 @@ CFLAGS += -Wno-sign-compare ifneq ($(CONFIG_DEBUG_INFO),y) CFLAGS += -fno-asynchronous-unwind-tables endif -#CFLAGS += $(call check_gcc,-funit-at-a-time,) + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call check_gcc,-funit-at-a-time,) head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o --- linux-2.6.3-rc2/Documentation/binfmt_misc.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/binfmt_misc.txt 2004-02-12 00:40:34.000000000 -0800 @@ -15,7 +15,7 @@ First you must mount binfmt_misc: mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc To actually register a new binary type, you have to set up a string looking like -:name:type:offset:magic:mask:interpreter: (where you can choose the ':' upon +:name:type:offset:magic:mask:interpreter:flags (where you can choose the ':' upon your needs) and echo it to /proc/sys/fs/binfmt_misc/register. Here is what the fields mean: - 'name' is an identifier string. A new /proc file will be created with this @@ -34,6 +34,21 @@ Here is what the fields mean: The mask is anded with the byte sequence of the file. - 'interpreter' is the program that should be invoked with the binary as first argument (specify the full path) + - 'flags' is an optional field that controls several aspects of the invocation + of the interpreter. It is a string of capital letters, each controls a certain + aspect. The following flags are supported - + 'P' - preserve-argv[0]. Legacy behavior of binfmt_misc is to overwrite the + original argv[0] with the full path to the binary. When this flag is + included, binfmt_misc will add an argument to the argument vector for + this purpose, thus preserving the original argv[0]. + 'O' - open-binary. Legacy behavior of binfmt_misc is to pass the full path + of the binary to the interpreter as an argument. When this flag is + included, binfmt_misc will open the file for reading and pass its + descriptor as an argument, instead of the full path, thus allowing + the interpreter to execute non-readable binaries. This feature should + be used with care - the interpreter has to be trusted not to emit + the contents of the non-readable binary. + There are some restrictions: - the whole register string may not exceed 255 characters @@ -83,9 +98,9 @@ If you want to pass special arguments to write a wrapper script for it. See Documentation/java.txt for an example. -Your interpreter should NOT look in the PATH for the filename; the -kernel passes it the full filename to use. Using the PATH can cause -unexpected behaviour and be a security hazard. +Your interpreter should NOT look in the PATH for the filename; the kernel +passes it the full filename (or the file descriptor) to use. Using $PATH can +cause unexpected behaviour and can be a security hazard. There is a web page about binfmt_misc at --- linux-2.6.3-rc2/Documentation/Changes 2004-02-09 21:35:50.000000000 -0800 +++ 25/Documentation/Changes 2004-02-12 00:40:35.000000000 -0800 @@ -216,13 +216,6 @@ chmod 0644 /dev/cpu/microcode as root before you can use this. You'll probably also want to get the user-space microcode_ctl utility to use with this. -If you have compiled the driver as a module you may need to add -the following line: - -alias char-major-10-184 microcode - -to your /etc/modules.conf file. - Powertweak ---------- @@ -259,17 +252,6 @@ mknod /dev/ppp c 108 0 as root. -If you build ppp support as modules, you will need the following in -your /etc/modules.conf file: - -alias char-major-108 ppp_generic -alias /dev/ppp ppp_generic -alias tty-ldisc-3 ppp_async -alias tty-ldisc-14 ppp_synctty -alias ppp-compress-21 bsd_comp -alias ppp-compress-24 ppp_deflate -alias ppp-compress-26 ppp_deflate - If you use devfsd and build ppp support as modules, you will need the following in your /etc/devfsd.conf file: --- linux-2.6.3-rc2/Documentation/computone.txt 2003-08-08 22:55:10.000000000 -0700 +++ 25/Documentation/computone.txt 2004-02-12 00:40:35.000000000 -0800 @@ -41,11 +41,11 @@ Hardware - If you have an ISA card, find Note the hardware address from the Computone ISA cards installed into the system. These are required for editing ip2.c or editing - /etc/modules.conf, or for specification on the modprobe + /etc/modprobe.conf, or for specification on the modprobe command line. - Note that the /etc/modules.conf file is named /etc/conf.modules - with older versions of the module utilities. + Note that the /etc/modules.conf should be used for older (pre-2.6) + kernels. Software - @@ -58,7 +58,7 @@ b) Run "make config" or "make menuconfig c) Set address on ISA cards then: edit /usr/src/linux/drivers/char/ip2.c if needed or - edit /etc/modules.conf if needed (module). + edit /etc/modprobe.conf if needed (module). or both to match this setting. d) Run "make modules" e) Run "make modules_install" @@ -145,11 +145,11 @@ the irqs are not specified the driver us selects polled mode). If no base addresses are specified the defaults in ip2.c are used. If you are autoloading the driver module with kerneld or kmod the base addresses and interrupt number must also be set in ip2.c -and recompile or just insert and options line in /etc/modules.conf or both. +and recompile or just insert and options line in /etc/modprobe.conf or both. The options line is equivalent to the command line and takes precidence over what is in ip2.c. -/etc/modules.conf sample: +/etc/modprobe.conf sample: options ip2 io=1,0x328 irq=1,10 alias char-major-71 ip2 alias char-major-72 ip2 --- linux-2.6.3-rc2/Documentation/crypto/api-intro.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/crypto/api-intro.txt 2004-02-12 00:40:35.000000000 -0800 @@ -68,7 +68,7 @@ Many real examples are available in the CONFIGURATION NOTES As Triple DES is part of the DES module, for those using modular builds, -add the following line to /etc/modules.conf: +add the following line to /etc/modprobe.conf: alias des3_ede des --- linux-2.6.3-rc2/Documentation/digiboard.txt 2003-06-14 12:18:09.000000000 -0700 +++ 25/Documentation/digiboard.txt 2004-02-12 00:40:35.000000000 -0800 @@ -24,7 +24,7 @@ The driver can be built direct into the The pcxx driver can be configured using the command line feature while loading the kernel with LILO or LOADLIN or, if built as a module, with arguments to insmod and modprobe or with parameters in -/etc/modules.conf for modprobe and kerneld. +/etc/modprobe.conf for modprobe and kerneld. After configuring the driver you need to create the device special files as described in "Device file creation:" below and set the appropriate @@ -91,13 +91,13 @@ devices following that board, you can em The remaining board still uses ttyD8-ttyD15 and cud8-cud15. -Example line for /etc/modules.conf for use with kerneld and as default +Example line for /etc/modprobe.conf for use with kerneld and as default parameters for modprobe: options pcxx io=0x200 numports=8 -For kerneld to work you will likely need to add these two lines to your -/etc/modules.conf: +For kmod to work you will likely need to add these two lines to your +/etc/modprobe.conf: alias char-major-22 pcxx alias char-major-23 pcxx --- linux-2.6.3-rc2/Documentation/fb/intel810.txt 2003-06-14 12:18:25.000000000 -0700 +++ 25/Documentation/fb/intel810.txt 2004-02-12 00:40:35.000000000 -0800 @@ -194,7 +194,7 @@ Using the same setup as described above, modprobe i810fb vram=2 xres=1024 bpp=8 hsync1=30 hsync2=55 vsync1=50 \ vsync2=85 accel=1 mtrr=1 -Or just add the following to /etc/modules.conf +Or just add the following to /etc/modprobe.conf options i810fb vram=2 xres=1024 bpp=16 hsync1=30 hsync2=55 vsync1=50 \ vsync2=85 accel=1 mtrr=1 --- linux-2.6.3-rc2/Documentation/filesystems/proc.txt 2003-11-09 16:45:05.000000000 -0800 +++ 25/Documentation/filesystems/proc.txt 2004-02-12 00:41:27.000000000 -0800 @@ -900,6 +900,15 @@ super-nr shows the number of currently a Every mounted file system needs a super block, so if you plan to mount lots of file systems, you may want to increase these numbers. +aio-nr and aio-max-nr +--------------------- + +aio-nr is the running total of the number of events specified on the +io_setup system call for all currently active aio contexts. If aio-nr +reaches aio-max-nr then io_setup will fail with EAGAIN. Note that +raising aio-max-nr does not result in the pre-allocation or re-sizing +of any kernel data structures. + 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats ----------------------------------------------------------- --- linux-2.6.3-rc2/Documentation/ftape.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/ftape.txt 2004-02-12 00:40:35.000000000 -0800 @@ -242,15 +242,15 @@ C. Boot and load time configuration Module parameters can be specified either directly when invoking the program 'insmod' at the shell prompt: - insmod ftape.o ft_tracing=4 + modprobe ftape ft_tracing=4 - or by editing the file `/etc/modules.conf' in which case they take + or by editing the file `/etc/modprobe.conf' in which case they take effect each time when the module is loaded with `modprobe' (please refer to the respective manual pages). Thus, you should add a line options ftape ft_tracing=4 - to `/etc/modules.conf` if you intend to increase the debugging + to `/etc/modprobe.conf` if you intend to increase the debugging output of the driver. @@ -298,7 +298,7 @@ C. Boot and load time configuration 5. Example module parameter setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To do the same, but with ftape compiled as a loadable kernel - module, add the following line to `/etc/modules.conf': + module, add the following line to `/etc/modprobe.conf': options ftape ft_probe_fc10=1 ft_tracing=4 --- linux-2.6.3-rc2/Documentation/hayes-esp.txt 2003-06-14 12:17:57.000000000 -0700 +++ 25/Documentation/hayes-esp.txt 2004-02-12 00:40:35.000000000 -0800 @@ -109,7 +109,7 @@ option with a space. For example: insmod esp dma=3 trigger=512 The esp module can be automatically loaded when needed. To cause this to -happen, add the following lines to /etc/modules.conf (replacing the last line +happen, add the following lines to /etc/modprobe.conf (replacing the last line with options for your configuration): alias char-major-57 esp --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/andthen 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,100 @@ + +define set_andthen + set var $thp=0 + set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] + set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) + set var $at_oc=kgdb_and_then_count + set var $at_cc=$at_oc +end + +define andthen_next + set var $at_cc=$arg0 +end + +define andthen + andthen_set_edge + if ($at_cc >= $at_oc) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc + output *($thp+($at_cc++ % $at_size )) + printf "\n" + end +end +define andthen_set_edge + set var $at_oc=kgdb_and_then_count + set var $at_low = $at_oc - $at_size + if ($at_low < 0 ) + set var $at_low = 0 + end + if (( $at_cc > $at_oc) || ($at_cc < $at_low)) + printf "Count outside of window, setting count to " + if ($at_cc >= $at_oc) + set var $at_cc = $at_oc + else + set var $at_cc = $at_low + end + printf "%d\n",$at_cc + end +end + +define beforethat + andthen_set_edge + if ($at_cc <= $at_low) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc-1 + output *($thp+(--$at_cc % $at_size )) + printf "\n" + end +end + +document andthen_next + andthen_next + . sets the number of the event to display next. If this event + . is not in the event pool, either andthen or beforethat will + . correct it to the nearest event pool edge. The event pool + . ends at the last event recorded and begins + . prior to that. If beforethat is used next, it will display + . event -1. +. + andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + + +document andthen + andthen +. displays the next event in the list. sets up to display +. the oldest saved event first. +. (optional) count of the event to display. +. note the number of events saved is specified at configure time. +. if events are saved between calls to andthen the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document set_andthen + set_andthen +. sets up to use the and commands. +. if you have defined your own struct, use the above and +. then enter the following: +. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] +. where is the name of your structure. +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document beforethat + beforethat +. displays the next prior event in the list. sets up to +. display the last occuring event first. +. +. note the number of events saved is specified at configure time. +. if events are saved between calls to beforethat the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/debug-nmi.txt 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,37 @@ +Subject: Debugging with NMI +Date: Mon, 12 Jul 1999 11:28:31 -0500 +From: David Grothe +Organization: Gcom, Inc +To: David Grothe + +Kernel hackers: + +Maybe this is old hat, but it is new to me -- + +On an ISA bus machine, if you short out the A1 and B1 pins of an ISA +slot you will generate an NMI to the CPU. This interrupts even a +machine that is hung in a loop with interrupts disabled. Used in +conjunction with kgdb < +ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can +gain debugger control of a machine that is hung in the kernel! Even +without kgdb the kernel will print a stack trace so you can find out +where it was hung. + +The A1/B1 pins are directly opposite one another and the farthest pins +towards the bracket end of the ISA bus socket. You can stick a paper +clip or multi-meter probe between them to short them out. + +I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the +board consists of two rows of wire wrap pins. So I wired a push button +between the A1/B1 pins and now have an ISA board that I can stick into +any ISA bus slot for debugger entry. + +Microsoft has a circuit diagram of a PCI card at +http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to +build one you will have to mail them and ask for the PAL equations. +Nobody makes one comercially. + +[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if +your machine catches fire, it is your problem, not mine.] + +-- Dave (the kgdb guy) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdb-globals.txt 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,71 @@ +Sender: akale@veritas.com +Date: Fri, 23 Jun 2000 19:26:35 +0530 +From: "Amit S. Kale" +Organization: Veritas Software (India) +To: Dave Grothe , linux-kernel@vger.rutgers.edu +CC: David Milburn , + "Edouard G. Parmelan" , + ezannoni@cygnus.com, Keith Owens +Subject: Re: Module debugging using kgdb + +Dave Grothe wrote: +> +> Amit: +> +> There is a 2.4.0 version of kgdb on our ftp site: +> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb +> and loadmodule.sh there. +> +> Have a look at the README file and see if I go it right. If not, send +> me some corrections and I will update it. +> +> Does your version of gdb solve the global variable problem? + +Yes. +Thanks to Elena Zanoni, gdb (developement version) can now calculate +correctly addresses of dynamically loaded object files. I have not been +following gdb developement for sometime and am not sure when symbol +address calculation fix is going to appear in a gdb stable version. + +Elena, any idea when the fix will make it to a prebuilt gdb from a +redhat release? + +For the time being I have built a gdb developement version. It can be +used for module debugging with loadmodule.sh script. + +The problem with calculating of module addresses with previous versions +of gdb was as follows: +gdb did not use base address of a section while calculating address of +a symbol in the section in an object file loaded via 'add-symbol-file'. +It used address of .text segment instead. Due to this addresses of +symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. + +Above mentioned fix allow gdb to use base address of a segment while +calculating address of a symbol in it. It adds a parameter '-s' to +'add-symbol-file' command for specifying base address of a segment. + +loadmodule.sh script works as follows. + +1. Copy a module file to target machine. +2. Load the module on the target machine using insmod with -m parameter. +insmod produces a module load map which contains base addresses of all +sections in the module and addresses of symbols in the module file. +3. Find all sections and their base addresses in the module from +the module map. +4. Generate a script that loads the module file. The script uses +'add-symbol-file' and specifies address of text segment followed by +addresses of all segments in the module. + +Here is an example gdb script produced by loadmodule.sh script. + +add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 +-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 +-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 + +With this command gdb can calculate addresses of symbols in ANY segment +in a module file. + +Regards. +-- +Amit Kale +Veritas Software ( http://www.veritas.com ) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,14 @@ +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 +target remote /dev/ttyS0 +define si +stepi +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip +end +define ni +nexti +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit.hw 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,117 @@ + +#Using ia-32 hardware breakpoints. +# +#4 hardware breakpoints are available in ia-32 processors. These breakpoints +#do not need code modification. They are set using debug registers. +# +#Each hardware breakpoint can be of one of the +#three types: execution, write, access. +#1. An Execution breakpoint is triggered when code at the breakpoint address is +#executed. +#2. A write breakpoint ( aka watchpoints ) is triggered when memory location +#at the breakpoint address is written. +#3. An access breakpoint is triggered when memory location at the breakpoint +#address is either read or written. +# +#As hardware breakpoints are available in limited number, use software +#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. +# +#Length of an access or a write breakpoint defines length of the datatype to +#be watched. Length is 1 for char, 2 short , 3 int. +# +#For placing execution, write and access breakpoints, use commands +#hwebrk, hwwbrk, hwabrk +#To remove a breakpoint use hwrmbrk command. +# +#These commands take following types of arguments. For arguments associated +#with each command, use help command. +#1. breakpointno: 0 to 3 +#2. length: 1 to 3 +#3. address: Memory location in hex ( without 0x ) e.g c015e9bc +# +#Use the command exinfo to find which hardware breakpoint occured. + +#hwebrk breakpointno address +define hwebrk + maintenance packet Y$arg0,0,0,$arg1 +end +document hwebrk + hwebrk
+ Places a hardware execution breakpoint + = 0 - 3 +
= Hex digits without leading "0x". +end + +#hwwbrk breakpointno length address +define hwwbrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwwbrk + hwwbrk
+ Places a hardware write breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwabrk breakpointno length address +define hwabrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwabrk + hwabrk
+ Places a hardware access breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwrmbrk breakpointno +define hwrmbrk + maintenance packet y$arg0 +end +document hwrmbrk + hwrmbrk + = 0 - 3 + Removes a hardware breakpoint +end + +define reboot + maintenance packet r +end +#exinfo +define exinfo + maintenance packet qE +end +document exinfo + exinfo + Gives information about a breakpoint. +end +define get_th + p $th=(struct thread_info *)((int)$esp & ~8191) +end +document get_th + get_tu + Gets and prints the current thread_info pointer, Defines th to be it. +end +define get_cu + p $cu=((struct thread_info *)((int)$esp & ~8191))->task +end +document get_cu + get_cu + Gets and print the "current" value. Defines $cu to be it. +end +define int_off + set var $flags=$eflags + set $eflags=$eflags&~0x200 + end +define int_on + set var $eflags|=$flags&0x200 + end +document int_off + saves the current interrupt state and clears the processor interrupt + flag. Use int_on to restore the saved flag. +end +document int_on + Restores the interrupt flag saved by int_off. +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit-modules 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,146 @@ +# +# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. +# +# This don't work for Linux-2.0 or older. +# +# Author Edouard G. Parmelan +# +# +# Fri Apr 30 20:33:29 CEST 1999 +# First public release. +# +# Major cleanup after experiment Linux-2.0 kernel without success. +# Symbols of a module are not in the correct order, I can't explain +# why :( +# +# Fri Mar 19 15:41:40 CET 1999 +# Initial version. +# +# Thu Jan 6 16:29:03 CST 2000 +# A little fixing by Dave Grothe +# +# Mon Jun 19 09:33:13 CDT 2000 +# Alignment changes from Edouard Parmelan +# +# The basic idea is to find where insmod load the module and inform +# GDB to load the symbol table of the module with the GDB command +# ``add-symbol-file
''. +# +# The Linux kernel holds the list of all loaded modules in module_list, +# this list end with &kernel_module (exactly with module->next == NULL, +# but the last module is not a real module). +# +# Insmod allocates the struct module before the object file. Since +# Linux-2.1, this structure contain his size. The real address of +# the object file is then (char*)module + module->size_of_struct. +# +# You can use three user functions ``mod-list'', ``mod-print-symbols'' +# and ``add-module-symbols''. +# +# mod-list list all loaded modules with the format: +# +# +# As soon as you have found the address of your module, you can +# print its exported symbols (mod-print-symbols) or inform GDB to add +# symbols from your module file (mod-add-symbols). +# +# The argument that you give to mod-print-symbols or mod-add-symbols +# is the from the mod-list command. +# +# When using the mod-add-symbols command you must also give the full +# pathname of the modules object code file. +# +# The command mod-add-lis is an example of how to make this easier. +# You can edit this macro to contain the path name of your own +# favorite module and then use it as a shorthand to load it. You +# still need the module-address, however. +# +# The internal function ``mod-validate'' set the GDB variable $mod +# as a ``struct module*'' if the kernel known the module otherwise +# $mod is set to NULL. This ensure to not add symbols for a wrong +# address. +# +# Have a nice hacking day ! +# +# +define mod-list + set $mod = (struct module*)module_list + # the last module is the kernel, ignore it + while $mod != &kernel_module + printf "%p\t%s\n", (long)$mod, ($mod)->name + set $mod = $mod->next + end +end +document mod-list +List all modules in the form: +Use the as the argument for the other +mod-commands: mod-print-symbols, mod-add-symbols. +end + +define mod-validate + set $mod = (struct module*)module_list + while ($mod != $arg0) && ($mod != &kernel_module) + set $mod = $mod->next + end + if $mod == &kernel_module + set $mod = 0 + printf "%p is not a module\n", $arg0 + end +end +document mod-validate +mod-validate +Internal user-command used to validate the module parameter. +If is a real loaded module, set $mod to it otherwise set $mod to 0. +end + + +define mod-print-symbols + mod-validate $arg0 + if $mod != 0 + set $i = 0 + while $i < $mod->nsyms + set $sym = $mod->syms[$i] + printf "%p\t%s\n", $sym->value, $sym->name + set $i = $i + 1 + end + end +end +document mod-print-symbols +mod-print-symbols +Print all exported symbols of the module. see mod-list +end + + +define mod-add-symbols-align + mod-validate $arg0 + if $mod != 0 + set $mod_base = ($mod->size_of_struct + (long)$mod) + if ($arg2 != 0) && (($mod_base & ($arg2 - 1)) != 0) + set $mod_base = ($mod_base | ($arg2 - 1)) + 1 + end + add-symbol-file $arg1 $mod_base + end +end +document mod-add-symbols-align +mod-add-symbols-align +Load the symbols table of the module from the object file where +first section aligment is . +To retreive alignment, use `objdump -h '. +end + +define mod-add-symbols + mod-add-symbols-align $arg0 $arg1 sizeof(long) +end +document mod-add-symbols +mod-add-symbols +Load the symbols table of the module from the object file. +Default alignment is 4. See mod-add-symbols-align. +end + +define mod-add-lis + mod-add-symbols-align $arg0 /usr/src/LiS/streams.o 16 +end +document mod-add-lis +mod-add-lis +Does mod-add-symbols /usr/src/LiS/streams.o +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdbeth.txt 2004-02-12 00:39:46.000000000 -0800 @@ -0,0 +1,92 @@ +KGDB over ethernet +================== + +Authors +------- + +Robert Walsh (2.6 port) +wangdi (2.6 port) +Matt Mackall (netpoll api) +San Mehat (original 2.4 code) + + +Introduction +------------ + +KGDB supports debugging over ethernet (kgdboe) via polling of a given +network interface. Most cards should be supported automatically. +Debugging facilities are available as soon as the network driver and +kgdboe have initialized. Unfortunately, this is too late in the boot +process for debugging some issues, but works quite well for many +others. This should not interfere with normal network usage and +doesn't require a dedicated NIC. + +Terminology +----------- + +This document uses the following terms: + + TARGET: the machine being debugged. + HOST: the machine running gdb. + + +Usage +----- + +You need to use the following command-line option on the TARGET kernel: + + kgdboe=[tgt-port]@/[dev],[host-port]@/[host-macaddr] + + where + tgt-port source for UDP packets (defaults to 6443) + tgt-ip source IP to use (interface address) + dev network interface (eth0) + host-port HOST UDP port (6442) (not really used) + host-ip IP address for HOST machine + host-macaddr ethernet MAC address for HOST (ff:ff:ff:ff:ff:ff) + + examples: + + kgdboe=7000@192.168.0.1/eth1,7001@192.168.0.2/00:05:3C:04:47:5D + this machine is 192.168.0.1 on eth1 + remote machine is 192.168.0.2 with MAC address 00:05:3C:04:47:5D + listen for gdb packets on port 7000 + send unsolicited gdb packets to port 7001 + + kgdboe=@192.168.0.1/,@192.168.0.2/ + this machine is 192.168.0.1 on default interface eth0 + remote machine is 192.168.0.2, use default broadcast MAC address + listen for gdb packets on default port 6443 + send unsolicited gdb packets to port 6442 + +Only packets originating from the configured HOST IP address will be +accepted by the debugger. + +On the HOST side, run gdb as normal and use a remote UDP host as the +target: + + % gdb ./vmlinux + GNU gdb Red Hat Linux (5.3post-0.20021129.18rh) + Copyright 2003 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux-gnu"... + (gdb) target remote udp:HOSTNAME:6443 + +You can now continue as if you were debugging over a serial line. + +Limitations +----------- + +The current release of this code is exclusive of using kgdb on a +serial interface, so you must boot without the kgdboe option to use +serial debugging. Trying to debug the network driver while using it +will prove interesting. + +Bug reports +----------- + +Send bug reports to Robert Walsh and Matt +Mackall . --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdb.txt 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,775 @@ +Last edit: <20030806.1637.12> +This file has information specific to the i386 kgdb option. Other +platforms with the kgdb option may behave in a similar fashion. + +New features: +============ +20030806.1557.37 +This version was made against the 2.6.0-test2 kernel. We have made the +following changes: + +- The getthread() code in the stub calls find_task_by_pid(). It fails + if we are early in the bring up such that the pid arrays have yet to + be allocated. We have added a line to kernel/pid.c to make + "kgdb_pid_init_done" true once the arrays are allocated. This way the + getthread() code knows not to call. This is only used by the thread + debugging stuff and threads will not yet exist at this point in the + boot. + +- For some reason, gdb was not asking for a new thread list when the + "info thread" command was given. We changed to the newer version of + the thread info command and gdb now seems to ask when needed. Result, + we now get all threads in the thread list. + +- We now respond to the ThreadExtraInfo request from gdb with the thread + name from task_struct .comm. This then appears in the thread list. + Thoughts on additional options for this are welcome. Things such as + "has BKL" and "Preempted" come to mind. I think we could have a flag + word that could enable different bits of info here. + +- We now honor, sort of, the C and S commands. These are continue and + single set after delivering a signal. We ignore the signal and do the + requested action. This only happens when we told gdb that a signal + was the reason for entry, which is only done on memory faults. The + result is that you can now continue into the Oops. + +- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, + but it is more exact on what language to use. + +- We added two dwarf2 include files and a bit of code at the end of + entry.S. This does not yet work, so it is disabled. Still we want to + keep track of the code and "maybe" someone out there can fix it. + +- Randy Dunlap sent some fix ups for this file which are now merged. + +- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a + compiler warning if CONFIG_KGDB is off (now who would do that :). + +- Andrew Morton sent a fix for the serial driver which is now merged. + +- Andrew also sent a change to the stub around the cpu managment code + which is also merged. + +- Andrew also sent a patch to make "f" as well as "g" work as SysRq + commands to enter kgdb, merged. + +- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a + "who" field to the spinlock data struct. This is filled with + "current" when ever the spinlock suceeds. Useful if you want to know + who has the lock. + +_ And last, but not least, we fixed the "get_cu" macro to properly get + the current value of "current". + +New features: +============ +20030505.1827.27 +We are starting to align with the sourceforge version, at least in +commands. To this end, the boot command string to start kgdb at +boot time has been changed from "kgdb" to "gdb". + +Andrew Morton sent a couple of patches which are now included as follows: +1.) We now return a flag to the interrupt handler. +2.) We no longer use smp_num_cpus (a conflict with the lock meter). +3.) And from William Lee Irwin III code to make + sure high-mem is set up before we attempt to register our interrupt + handler. +We now include asm/kgdb.h from config.h so you will most likely never +have to include it. It also 'NULLS' the kgdb macros you might have in +your code when CONFIG_KGDB is not defined. This allows you to just +turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. +This include is conditioned on the machine being an x86 so as to not +mess with other archs. + +20020801.1129.03 +This is currently the version for the 2.4.18 (and beyond?) kernel. + +We have several new "features" beginning with this version: + +1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more + waiting and it will pull that guy out of an IRQ off spin lock :) + +2.) We doctored up the code that tells where a task is waiting and + included it so that the "info thread" command will show a bit more + than "schedule()". Try it... + +3.) Added the ability to call a function from gdb. All the standard gdb + issues apply, i.e. if you hit a breakpoint in the function, you are + not allowed to call another (gdb limitation, not kgdb). To help + this capability we added a memory allocation function. Gdb does not + return this memory (it is used for strings that you pass to that function + you are calling from gdb) so we fixed up a way to allow you to + manually return the memory (see below). + +4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the + interrupt flag to now also include the preemption count and the + "in_interrupt" info. The flag is now called "with_pif" to indicate + the order, preempt_count, in_interrupt, flag. The preempt_count is + shifted left by 4 bits so you can read the count in hex by dropping + the low order digit. In_interrupt is in bit 1, and the flag is in + bit 0. + +5.) The command: "p kgdb_info" is now expanded and prints something + like: +(gdb) p kgdb_info +$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, + errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, + cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, + regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} + + Things to note here: a.) used_malloc is the amount of memory that + has been malloc'ed to do calls from gdb. You can reclaim this + memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) + cpus_waiting is now "sized" by the number of CPUs you enter at + configure time in the kgdb configure section. This is NOT used + anywhere else in the system, but it is "nice" here. c.) The task's + "pid" is now in the structure. This is the pid you will need to use + to decode to the thread id to get gdb to look at that thread. + Remember that the "info thread" command prints a list of threads + wherein it numbers each thread with its reference number followed + by the thread's pid. Note that the per-CPU idle threads actually + have pids of 0 (yes, there is more than one pid 0 in an SMP system). + To avoid confusion, kgdb numbers these threads with numbers beyond + the MAX_PID. That is why you see 32768 and above. + +6.) A subtle change, we now provide the complete register set for tasks + that are active on the other CPUs. This allows better trace back on + those tasks. + + And, let's mention what we could not fix. Back-trace from all but the + thread that we trapped will, most likely, have a bogus entry in it. + The problem is that gdb does not recognize the entry code for + functions that use "current" near (at all?) the entry. The compiler + is putting the "current" decode as the first two instructions of the + function where gdb expects to find %ebp changing code. Back trace + also has trouble with interrupt frames. I am talking with Daniel + Jacobowitz about some way to fix this, but don't hold your breath. + +20011220.0050.35 +Major enhancement with this version is the ability to hold one or more +CPUs in an SMP system while allowing the others to continue. Also, by +default only the current CPU is enabled on single-step commands (please +note that gdb issues single-step commands at times other than when you +use the si command). + +Another change is to collect some useful information in +a global structure called "kgdb_info". You should be able to just: + +p kgdb_info + +although I have seen cases where the first time this is done gdb just +prints the first member but prints the whole structure if you then enter +CR (carriage return or enter). This also works: + +p *&kgdb_info + +Here is a sample: +(gdb) p kgdb_info +$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, + vector = 3, print_debug_info = 0} + +"Called_from" is the return address from the current entry into kgdb. +Sometimes it is useful to know why you are in kgdb, for example, was +it an NMI or a real breakpoint? The simple way to interrogate this +return address is: + +l *0xc010732c + +which will print the surrounding few lines of source code. + +"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the +kgdb_ts entries). + +"errcode" and "vector" are other entry parameters which may be helpful on +some traps. + +"print_debug_info" is the internal debugging kgdb print enable flag. Yes, +you can modify it. + +In SMP systems kgdb_info also includes the "cpus_waiting" structure and +"hold_on_step": + +(gdb) p kgdb_info +$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, + vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ + task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, + regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}}} + +"Cpus_waiting" has an entry for each CPU other than the current one that +has been stopped. Each entry contains the task_struct address for that +CPU, the address of the regs for that task and a hold flag. All these +have the proper typing so that, for example: + +p *kgdb_info.cpus_waiting[1].regs + +will print the registers for CPU 1. + +"Hold_on_sstep" is a new feature with this version and comes up set or +true. What this means is that whenever kgdb is asked to single-step all +other CPUs are held (i.e. not allowed to execute). The flag applies to +all but the current CPU and, again, can be changed: + +p kgdb_info.hold_on_sstep=0 + +restores the old behavior of letting all CPUs run during single-stepping. + +Likewise, each CPU has a "hold" flag, which if set, locks that CPU out +of execution. Note that this has some risk in cases where the CPUs need +to communicate with each other. If kgdb finds no CPU available on exit, +it will push a message thru gdb and stay in kgdb. Note that it is legal +to hold the current CPU as long as at least one CPU can execute. + +20010621.1117.09 +This version implements an event queue. Events are signaled by calling +a function in the kgdb stub and may be examined from gdb. See EVENTS +below for details. This version also tightens up the interrupt and SMP +handling to not allow interrupts on the way to kgdb from a breakpoint +trap. It is fine to allow these interrupts for user code, but not +system debugging. + +Version +======= + +This version of the kgdb package was developed and tested on +kernel version 2.4.16. It will not install on any earlier kernels. +It is possible that it will continue to work on later versions +of 2.4 and then versions of 2.5 (I hope). + + +Debugging Setup +=============== + +Designate one machine as the "development" machine. This is the +machine on which you run your compiles and which has your source +code for the kernel. Designate a second machine as the "target" +machine. This is the machine that will run your experimental +kernel. + +The two machines will be connected together via a serial line out +one or the other of the COM ports of the PC. You will need the +appropriate modem eliminator (null modem) cable(s) for this. + +Decide on which tty port you want the machines to communicate, then +connect them up back-to-back using the null modem cable. COM1 is +/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection +with the two machines prior to trying to debug a kernel. Once you +have it working, on the TARGET machine, enter: + +setserial /dev/ttyS0 (or what ever tty you are using) + +and record the port address and the IRQ number. + +On the DEVELOPMENT machine you need to apply the patch for the kgdb +hooks. You have probably already done that if you are reading this +file. + +On your DEVELOPMENT machine, go to your kernel source directory and do +"make Xconfig" where X is one of "x", "menu", or "". If you are +configuring in the standard serial driver, it must not be a module. +Either yes or no is ok, but making the serial driver a module means it +will initialize after kgdb has set up the UART interrupt code and may +cause a failure of the control-C option discussed below. The configure +question for the serial driver is under the "Character devices" heading +and is: + +"Standard/generic (8250/16550 and compatible UARTs) serial support" + +Go down to the kernel debugging menu item and open it up. Enable the +kernel kgdb stub code by selecting that item. You can also choose to +turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler +to put more debug info (like local symbols) in the object file. On the +i386 -g and -ggdb are the same so this option just reduces to "O1". The +-O1 reduces the optimization level. This may be helpful in some cases, +be aware, however, that this may also mask the problem you are looking +for. + +The baud rate. Default is 115200. What ever you choose be sure that +the host machine is set to the same speed. I recommend the default. + +The port. This is the I/O address of the serial UART that you should +have gotten using setserial as described above. The standard COM1 port +(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ +3. + +The port IRQ (see above). + +Stack overflow test. This option makes a minor change in the trap, +system call and interrupt code to detect stack overflow and transfer +control to kgdb if it happens. (Some platforms have this in the +baseline code, but the i386 does not.) + +You can also configure the system to recognize the boot option +"console=kgdb" which if given will cause all console output during +booting to be put thru gdb as well as other consoles. This option +requires that gdb and kgdb be connected prior to sending console output +so, if they are not, a breakpoint is executed to force the connection. +This will happen before any kernel output (it is going thru gdb, right), +and will stall the boot until the connection is made. + +You can also configure in a patch to SysRq to enable the kGdb SysRq. +This request generates a breakpoint. Since the serial port IRQ line is +set up after any serial drivers, it is possible that this command will +work when the control-C will not. + +Save and exit the Xconfig program. Then do "make clean" , "make dep" +and "make bzImage" (or whatever target you want to make). This gets the +kernel compiled with the "-g" option set -- necessary for debugging. + +You have just built the kernel on your DEVELOPMENT machine that you +intend to run on your TARGET machine. + +To install this new kernel, use the following installation procedure. +Remember, you are on the DEVELOPMENT machine patching the kernel source +for the kernel that you intend to run on the TARGET machine. + +Copy this kernel to your target machine using your usual procedures. I +usually arrange to copy development: +/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine +via a LAN based NFS access. That is, I run the cp command on the target +and copy from the development machine via the LAN. Run Lilo (see "man +lilo" for details on how to set this up) on the new kernel on the target +machine so that it will boot! Then boot the kernel on the target +machine. + +On the DEVELOPMENT machine, create a file called .gdbinit in the +directory /usr/src/linux. An example .gdbinit file looks like this: + +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 (or what ever speed you have chosen) +target remote /dev/ttyS0 + + +Change the "echo" and "target" definition so that it specifies the tty +port that you intend to use. Change the "remotebaud" definition to +match the data rate that you are going to use for the com line. + +You are now ready to try it out. + +Boot your target machine with "kgdb" in the boot command i.e. something +like: + +lilo> test kgdb + +or if you also want console output thru gdb: + +lilo> test kgdb console=kgdb + +You should see the lilo message saying it has loaded the kernel and then +all output stops. The kgdb stub is trying to connect with gdb. Start +gdb something like this: + + +On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". +When gdb gets the symbols loaded it will read your .gdbinit file and, if +everything is working correctly, you should see gdb print out a few +lines indicating that a breakpoint has been taken. It will actually +show a line of code in the target kernel inside the kgdb activation +code. + +The gdb interaction should look something like this: + + linux-dev:/usr/src/linux# gdb vmlinux + GDB is free software and you are welcome to distribute copies of it + under certain conditions; type "show copying" to see the conditions. + There is absolutely no warranty for GDB; type "show warranty" for details. + GDB 4.15.1 (i486-slackware-linux), + Copyright 1995 Free Software Foundation, Inc... + breakpoint () at i386-stub.c:750 + 750 } + (gdb) + +You can now use whatever gdb commands you like to set breakpoints. +Enter "continue" to start your target machine executing again. At this +point the target system will run at full speed until it encounters +your breakpoint or gets a segment violation in the kernel, or whatever. + +If you have the kgdb console enabled when you continue, gdb will print +out all the console messages. + +The above example caused a breakpoint relatively early in the boot +process. For the i386 kgdb it is possible to code a break instruction +as the first C-language point in init/main.c, i.e. as the first instruction +in start_kernel(). This could be done as follows: + +#include + breakpoint(); + +This breakpoint() is really a function that sets up the breakpoint and +single-step hardware trap cells and then executes a breakpoint. Any +early hard coded breakpoint will need to use this function. Once the +trap cells are set up they need not be set again, but doing it again +does not hurt anything, so you don't need to be concerned about which +breakpoint is hit first. Once the trap cells are set up (and the kernel +sets them up in due course even if breakpoint() is never called) the +macro: + +BREAKPOINT; + +will generate an inline breakpoint. This may be more useful as it stops +the processor at the instruction instead of in a function a step removed +from the location of interest. In either case must be +included to define both breakpoint() and BREAKPOINT. + +Triggering kgdbstub at other times +================================== + +Often you don't need to enter the debugger until much later in the boot +or even after the machine has been running for some time. Once the +kernel is booted and interrupts are on, you can force the system to +enter the debugger by sending a control-C to the debug port. This is +what the first line of the recommended .gdbinit file does. This allows +you to start gdb any time after the system is up as well as when the +system is already at a breakpoint. (In the case where the system is +already at a breakpoint the control-C is not needed, however, it will +be ignored by the target so no harm is done. Also note the the echo +command assumes that the port speed is already set. This will be true +once gdb has connected, but it is best to set the port speed before you +run gdb.) + +Another simple way to do this is to put the following file in you ~/bin +directory: + +#!/bin/bash +echo -e "\003" > /dev/ttyS0 + +Here, the ttyS0 should be replaced with what ever port you are using. +The "\003" is control-C. Once you are connected with gdb, you can enter +control-C at the command prompt. + +An alternative way to get control to the debugger is to enable the kGdb +SysRq command. Then you would enter Alt-SysRq-g (all three keys at the +same time, but push them down in the order given). To refresh your +memory of the available SysRq commands try Alt-SysRq-=. Actually any +undefined command could replace the "=", but I like to KNOW that what I +am pushing will never be defined. + +Debugging hints +=============== + +You can break into the target machine at any time from the development +machine by typing ^C (see above paragraph). If the target machine has +interrupts enabled this will stop it in the kernel and enter the +debugger. + +There is unfortunately no way of breaking into the kernel if it is +in a loop with interrupts disabled, so if this happens to you then +you need to place exploratory breakpoints or printk's into the kernel +to find out where it is looping. The exploratory breakpoints can be +entered either thru gdb or hard coded into the source. This is very +handy if you do something like: + +if () BREAKPOINT; + + +There is a copy of an e-mail in the Documentation/i386/kgdb/ directory +(debug-nmi.txt) which describes how to create an NMI on an ISA bus +machine using a paper clip. I have a sophisticated version of this made +by wiring a push button switch into a PC104/ISA bus adapter card. The +adapter card nicely furnishes wire wrap pins for all the ISA bus +signals. + +When you are done debugging the kernel on the target machine it is a +good idea to leave it in a running state. This makes reboots faster, +bypassing the fsck. So do a gdb "continue" as the last gdb command if +this is possible. To terminate gdb itself on the development machine +and leave the target machine running, first clear all breakpoints and +continue, then type ^Z to suspend gdb and then kill it with "kill %1" or +something similar. + +If gdbstub Does Not Work +======================== + +If it doesn't work, you will have to troubleshoot it. Do the easy +things first like double checking your cabling and data rates. You +might try some non-kernel based programs to see if the back-to-back +connection works properly. Just something simple like cat /etc/hosts +>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you +if you can send data from one machine to the other. Make sure it works +in both directions. There is no point in tearing out your hair in the +kernel if the line doesn't work. + +All of the real action takes place in the file +/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target +machine that interacts with gdb on the development machine. In gdb you can +turn on a debug switch with the following command: + + set remotedebug + +This will print out the protocol messages that gdb is exchanging with +the target machine. + +Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is +the code that talks to the serial port on the target side. There might +be a problem there. In particular there is a section of this code that +tests the UART which will tell you what UART you have if you define +"PRNT" (just remove "_off" from the #define PRNT_off). To view this +report you will need to boot the system without any beakpoints. This +allows the kernel to run to the point where it calls kgdb to set up +interrupts. At this time kgdb will test the UART and print out the type +it finds. (You need to wait so that the printks are actually being +printed. Early in the boot they are cached, waiting for the console to +be enabled. Also, if kgdb is entered thru a breakpoint it is possible +to cause a dead lock by calling printk when the console is locked. The +stub thus avoids doing printks from breakpoints, especially in the +serial code.) At this time, if the UART fails to do the expected thing, +kgdb will print out (using printk) information on what failed. (These +messages will be buried in all the other boot up messages. Look for +lines that start with "gdb_hook_interrupt:". You may want to use dmesg +once the system is up to view the log. If this fails or if you still +don't connect, review your answers for the port address. Use: + +setserial /dev/ttyS0 + +to get the current port and IRQ information. This command will also +tell you what the system found for the UART type. The stub recognizes +the following UART types: + +16450, 16550, and 16550A + +If you are really desperate you can use printk debugging in the +kgdbstub code in the target kernel until you get it working. In particular, +there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c +named "remote_debug". Compile your kernel with this set to 1, rather +than 0 and the debug stub will print out lots of stuff as it does +what it does. Likewise there are debug printks in the kgdb_serial.c +code that can be turned on with simple changes in the macro defines. + + +Debugging Loadable Modules +========================== + +This technique comes courtesy of Edouard Parmelan + + +When you run gdb, enter the command + +source gdbinit-modules + +This will read in a file of gdb macros that was installed in your +kernel source directory when kgdb was installed. This file implements +the following commands: + +mod-list + Lists the loaded modules in the form + +mod-print-symbols + Prints all the symbols in the indicated module. + +mod-add-symbols + Loads the symbols from the object file and associates them + with the indicated module. + +After you have loaded the module that you want to debug, use the command +mod-list to find the of your module. Then use that +address in the mod-add-symbols command to load your module's symbols. +From that point onward you can debug your module as if it were a part +of the kernel. + +The file gdbinit-modules also contains a command named mod-add-lis as +an example of how to construct a command of your own to load your +favorite module. The idea is to "can" the pathname of the module +in the command so you don't have to type so much. + +Threads +======= + +Each process in a target machine is seen as a gdb thread. gdb thread +related commands (info threads, thread n) can be used. + +ia-32 hardware breakpoints +========================== + +kgdb stub contains support for hardware breakpoints using debugging features +of ia-32(x86) processors. These breakpoints do not need code modification. +They use debugging registers. 4 hardware breakpoints are available in ia-32 +processors. + +Each hardware breakpoint can be of one of the following three types. + +1. Execution breakpoint - An Execution breakpoint is triggered when code + at the breakpoint address is executed. + + As limited number of hardware breakpoints are available, it is + advisable to use software breakpoints ( break command ) instead + of execution hardware breakpoints, unless modification of code + is to be avoided. + +2. Write breakpoint - A write breakpoint is triggered when memory + location at the breakpoint address is written. + + A write or can be placed for data of variable length. Length of + a write breakpoint indicates length of the datatype to be + watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for + 4 byte data. + +3. Access breakpoint - An access breakpoint is triggered when memory + location at the breakpoint address is either read or written. + + Access breakpoints also have lengths similar to write breakpoints. + +IO breakpoints in ia-32 are not supported. + +Since gdb stub at present does not use the protocol used by gdb for hardware +breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros +for hardware breakpoints are described below. + +hwebrk - Places an execution breakpoint + hwebrk breakpointno address +hwwbrk - Places a write breakpoint + hwwbrk breakpointno length address +hwabrk - Places an access breakpoint + hwabrk breakpointno length address +hwrmbrk - Removes a breakpoint + hwrmbrk breakpointno +exinfo - Tells whether a software or hardware breakpoint has occurred. + Prints number of the hardware breakpoint if a hardware breakpoint has + occurred. + +Arguments required by these commands are as follows +breakpointno - 0 to 3 +length - 1 to 3 +address - Memory location in hex digits ( without 0x ) e.g c015e9bc + +SMP support +========== + +When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb +client, all the processors are forced to enter the debugger. Current +thread corresponds to the thread running on the processor where +breakpoint occurred. Threads running on other processor(s) appear +similar to other non-running threads in the 'info threads' output. +Within the kgdb stub there is a structure "waiting_cpus" in which kgdb +records the values of "current" and "regs" for each CPU other than the +one that hit the breakpoint. "current" is a pointer to the task +structure for the task that CPU is running, while "regs" points to the +saved registers for the task. This structure can be examined with the +gdb "p" command. + +ia-32 hardware debugging registers on all processors are set to same +values. Hence any hardware breakpoints may occur on any processor. + +gdb troubleshooting +=================== + +1. gdb hangs +Kill it. restart gdb. Connect to target machine. + +2. gdb cannot connect to target machine (after killing a gdb and +restarting another) If the target machine was not inside debugger when +you killed gdb, gdb cannot connect because the target machine won't +respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. +e.g. echo -e "\003" > /dev/ttyS1 +This forces that target machine into the debugger, after which you +can connect. + +3. gdb cannot connect even after echoing Ctrl+C into serial line +Try changing serial line settings min to 1 and time to 0 +e.g. stty min 1 time 0 < /dev/ttyS1 +Try echoing again + +Check serial line speed and set it to correct value if required +e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + +EVENTS +====== + +Ever want to know the order of things happening? Which CPU did what and +when? How did the spinlock get the way it is? Then events are for +you. Events are defined by calls to an event collection interface and +saved for later examination. In this case, kgdb events are saved by a +very fast bit of code in kgdb which is fully SMP and interrupt protected +and they are examined by using gdb to display them. Kgdb keeps only +the last N events, where N must be a power of two and is defined at +configure time. + + +Events are signaled to kgdb by calling: + +kgdb_ts(data0,data1) + +For each call kgdb records each call in an array along with other info. +Here is the array definition: + +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + int data0; + int data1; +}; + +For SMP machines the CPU is recorded, for all machines the TSC is +recorded (gets a time stamp) as well as the line number and source file +the call was made from. The address of the (from), the "if" (interrupt +flag) and the two data items are also recorded. The macro kgdb_ts casts +the types to int, so you can put any 32-bit values here. There is a +configure option to select the number of events you want to keep. A +nice number might be 128, but you can keep up to 1024 if you want. The +number must be a power of two. An "andthen" macro library is provided +for gdb to help you look at these events. It is also possible to define +a different structure for the event storage and cast the data to this +structure. For example the following structure is defined in kgdb: + +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + struct task_struct *t1; + struct task_struct *t2; +}; + +If you use this for display, the data elements will be displayed as +pointers to task_struct entries. You may want to define your own +structure to use in casting. You should only change the last two items +and you must keep the structure size the same. Kgdb will handle these +as 32-bit ints, but within that constraint you can define a structure to +cast to any 32-bit quantity. This need only be available to gdb and is +only used for casting in the display code. + +Final Items +=========== + +I picked up this code from Amit S. Kale and enhanced it. + +If you make some really cool modification to this stuff, or if you +fix a bug, please let me know. + +George Anzinger + + +Amit S. Kale + + +(First kgdb by David Grothe ) + +(modified by Tigran Aivazian ) + Putting gdbstub into the kernel config menu. + +(modified by Scott Foehner ) + Hooks for entering gdbstub at boot time. + +(modified by Amit S. Kale ) + Threads, ia-32 hw debugging, mp support, console support, + nmi watchdog handling. + +(modified by George Anzinger ) + Extended threads to include the idle threads. + Enhancements to allow breakpoint() at first C code. + Use of module_init() and __setup() to automate the configure. + Enhanced the cpu "collection" code to work in early bring-up. + Added ability to call functions from gdb + Print info thread stuff without going back to schedule() + Now collect the "other" cpus with an IPI/ NMI. --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/loadmodule.sh 2004-02-12 00:39:45.000000000 -0800 @@ -0,0 +1,78 @@ +#/bin/sh +# This script loads a module on a target machine and generates a gdb script. +# source generated gdb script to load the module file at appropriate addresses +# in gdb. +# +# Usage: +# Loading the module on target machine and generating gdb script) +# [foo]$ loadmodule.sh +# +# Loading the module file into gdb +# (gdb) source +# +# Modify following variables according to your setup. +# TESTMACHINE - Name of the target machine +# GDBSCRIPTS - The directory where a gdb script will be generated +# +# Author: Amit S. Kale (akale@veritas.com). +# +# If you run into problems, please check files pointed to by following +# variables. +# ERRFILE - /tmp/.errs contains stderr output of insmod +# MAPFILE - /tmp/.map contains stdout output of insmod +# GDBSCRIPT - $GDBSCRIPTS/load gdb script. + +TESTMACHINE=foo +GDBSCRIPTS=/home/bar + +if [ $# -lt 1 ] ; then { + echo Usage: $0 modulefile + exit +} ; fi + +MODULEFILE=$1 +MODULEFILEBASENAME=`basename $1` + +if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { + MODULEFILE=`pwd`/$MODULEFILE +} fi + +ERRFILE=/tmp/$MODULEFILEBASENAME.errs +MAPFILE=/tmp/$MODULEFILEBASENAME.map +GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME + +function findaddr() { + local ADDR=0x$(echo "$SEGMENTS" | \ + grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ + sed 's/[ ]*[^ ]*$//') + echo $ADDR +} + +function checkerrs() { + if [ "`cat $ERRFILE`" != "" ] ; then { + cat $ERRFILE + exit + } fi +} + +#load the module +echo Copying $MODULEFILE to $TESTMACHINE +rcp $MODULEFILE root@${TESTMACHINE}: + +echo Loading module $MODULEFILE +rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ + > $MAPFILE 2> $ERRFILE +checkerrs + +SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` +TEXTADDR=$(findaddr "\\.text[^.]") +LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" +SEGADDRS=`echo "$SEGMENTS" | awk '//{ + if ($1 != ".text" && $1 != ".this" && + $1 != ".kstrtab" && $1 != ".kmodtab") { + print " -s " $1 " 0x" $3 " " + } +}'` +LOADSTRING="$LOADSTRING $SEGADDRS" +echo Generating script $GDBSCRIPT +echo $LOADSTRING > $GDBSCRIPT --- linux-2.6.3-rc2/Documentation/ide.txt 2003-10-17 15:58:03.000000000 -0700 +++ 25/Documentation/ide.txt 2004-02-12 00:40:35.000000000 -0800 @@ -198,12 +198,11 @@ drivers can always be compiled as loadab can only be compiled into the kernel, and the core code (ide.c) can be compiled as a loadable module provided no chipset support is needed. -When using ide.c/ide-tape.c as modules in combination with kerneld, add: +When using ide.c as a module in combination with kmod, add: alias block-major-3 ide-probe - alias char-major-37 ide-tape -respectively to /etc/modules.conf. +to /etc/modprobe.conf. When ide.c is used as a module, you can pass command line parameters to the driver using the "options=" keyword to insmod, while replacing any ',' with --- linux-2.6.3-rc2/Documentation/kbuild/modules.txt 2004-01-09 00:04:30.000000000 -0800 +++ 25/Documentation/kbuild/modules.txt 2004-02-12 00:41:10.000000000 -0800 @@ -17,12 +17,52 @@ out-of-the-box support for installation Compiling modules outside the official kernel --------------------------------------------- -Often modules are developed outside the official kernel. -To keep up with changes in the build system the most portable way -to compile a module outside the kernel is to use the following command-line: + +Often modules are developed outside the official kernel. To keep up +with changes in the build system the most portable way to compile a +module outside the kernel is to use the kernel build system, +kbuild. Use the following command-line: make -C path/to/kernel/src SUBDIRS=$PWD modules This requires that a makefile exits made in accordance to -Documentation/kbuild/makefiles.txt. +Documentation/kbuild/makefiles.txt. Read that file for more details on +the build system. + +The following is a short summary of how to write your Makefile to get +you up and running fast. Assuming your module will be called +yourmodule.ko, your code should be in yourmodule.c and your Makefile +should include + +obj-m := yourmodule.o + +If the code for your module is in multiple files that need to be +linked, you need to tell the build system which files to compile. In +the case of multiple files, none of these files can be named +yourmodule.c because doing so would cause a problem with the linking +step. Assuming your code exists in file1.c, file2.c, and file3.c and +you want to build yourmodule.ko from them, your Makefile should +include + +obj-m := yourmodule.o +yourmodule-objs := file1.o file2.o file3.o + +Now for a final example to put it all together. Assuming the +KERNEL_SOURCE environment variable is set to the directory where you +compiled the kernel, a simple Makefile that builds yourmodule.ko as +described above would look like + +# Tells the build system to build yourmodule.ko. +obj-m := yourmodule.o + +# Tells the build system to build these object files and link them as +# yourmodule.o, before building yourmodule.ko. This line can be left +# out if all the code for your module is in one file, yourmodule.c. If +# you are using multiple files, none of these files can be named +# yourmodule.c. +yourmodule-objs := file1.o file2.o file3.o +# Invokes the kernel build system to come back to the current +# directory and build yourmodule.ko. +default: + make -C ${KERNEL_SOURCE} SUBDIRS=`pwd` modules --- linux-2.6.3-rc2/Documentation/kernel-parameters.txt 2004-02-03 20:42:34.000000000 -0800 +++ 25/Documentation/kernel-parameters.txt 2004-02-12 00:41:12.000000000 -0800 @@ -174,11 +174,18 @@ running once the system is up. atascsi= [HW,SCSI] Atari SCSI - atkbd.set= [HW] Select keyboard code set - Format: + atkbd.extra= [HW] Enable extra LEDs and keys on IBM RapidAccess, EzKey + and similar keyboards + + atkbd.reset= [HW] Reset keyboard during initialization + + atkbd.set= [HW] Select keyboard code set + Format: (2 = AT (default) 3 = PS/2) + + atkbd.scroll= [HW] Enable scroll wheel on MS Office and similar keyboards + atkbd.softrepeat= [HW] Use software keyboard repeat - atkbd.reset= [HW] Reset keyboard during initialization autotest [IA64] @@ -237,7 +244,7 @@ running once the system is up. Forces specified timesource (if avaliable) to be used when calculating gettimeofday(). If specicified timesource is not avalible, it defaults to PIT. - Format: { pit | tsc | cyclone | ... } + Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable @@ -310,6 +317,23 @@ running once the system is up. dtc3181e= [HW,SCSI] + earlyprintk= [x86, x86_64] + early_printk=vga + early_printk=serial[,ttySn[,baudrate]] + + Append ,keep to not disable it when the real console + takes over. + + Only vga or serial at a time, not both. + + Currently only ttyS0 and ttyS1 are supported. + + Interaction with the standard serial driver is not + very good. + + The VGA output is eventually overwritten by the real + console. + eata= [HW,SCSI] eda= [HW,PS2] @@ -387,6 +411,14 @@ running once the system is up. hd?= [HW] (E)IDE subsystem hd?lun= See Documentation/ide.txt. + highmem=size[KMG] [IA32,KNL,BOOT] forces highmem to be at most 'size' bytes. + This works even on boxes with at least 72MB RAM that have no + highmem otherwise. This also works to reduce highmem size on + bigger boxes. + + Note: highmem is adjusted downward for proper zone alignment + of the highmem physical start address + hisax= [HW,ISDN] See Documentation/isdn/README.HiSax. --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/laptop-mode.txt 2004-02-12 00:40:12.000000000 -0800 @@ -0,0 +1,480 @@ +How to conserve battery power using laptop-mode +----------------------------------------------- + +Document Author: Bart Samwel (bart@samwel.tk) +Date created: January 2, 2004 + +Introduction +------------ + +Laptopmode is used to minimize the time that the hard disk needs to be spun up, +to conserve battery power on laptops. It has been reported to cause significant +power savings. + +Contents +-------- + +* Introduction +* The short story +* Caveats +* The details +* Tips & Tricks +* Control script +* ACPI integration +* Monitoring tool + + +The short story +--------------- + +If you just want to use it, run the laptop_mode control script (which is included +at the end of this document) as follows: + +# laptop_mode start + +Then set your harddisk spindown time to a relatively low value with hdparm: + +hdparm -S 4 /dev/hda + +The value -S 4 means 20 seconds idle time before spindown. Your harddisk will +now only spin up when a disk cache miss occurs, or at least once every 10 +minutes to write back any pending changes. + +To stop laptop_mode, remount your filesystems with regular commit intervals +(e.g., 5 seconds), and run "laptop_mode stop". + + +Caveats +------- + +* The downside of laptop mode is that you have a chance of losing up + to 10 minutes of work. If you cannot afford this, don't use it! + +* Most desktop hard drives have a very limited lifetime measured in spindown + cycles, typically about 50.000 times (it's usually listed on the spec sheet). + Check your drive's rating, and don't wear down your drive's lifetime if you + don't need to. + +* If you mount some of your ext3/reiserfs filesystems with the -n option, then + the control script will not be able to remount them correctly. You must set + DO_REMOUNTS=0 in the control script, otherwise it will remount them with the + wrong options -- or it will fail because it cannot write to /etc/mtab. + +* If you have your filesystems listed as type "auto" in fstab, like I did, then + the control script will not recognize them as filesystems that need remounting. + +The details +----------- + +Laptop-mode is controlled by the flag /proc/sys/vm/laptop_mode. When this +flag is set, any physical disk read operation (that might have caused the +hard disk to spin up) causes Linux to flush all dirty blocks. The result +of this is that after a disk has spun down, it will not be spun up anymore +to write dirty blocks, because those blocks had already been written +immediately after the most recent read operation + +To increase the effectiveness of the laptop_mode strategy, the laptop_mode +control script increases dirty_expire_centisecs and dirty_writeback_centisecs in +/proc/sys/vm to about 10 minutes (by default), which means that pages that are +dirtied are not forced to be written to disk as often. The control script also +changes the dirty background ratio, so that background writeback of dirty pages +is not done anymore. Combined with a higher commit value (also 10 minutes) for +ext3 or ReiserFS filesystems (also done automatically by the control script), +this results in concentration of disk activity in a small time interval which +occurs only once every 10 minutes, or whenever the disk is forced to spin up by +a cache miss. The disk can then be spun down in the periods of inactivity. + +If you want to find out which process caused the disk to spin up, you can +gather information by setting the flag /proc/sys/vm/block_dump. When this flag +is set, Linux reports all disk read and write operations that take place, and +all block dirtyings done to files. This makes it possible to debug why a disk +needs to spin up, and to increase battery life even more. + +If 10 minutes is too much or too little downtime for you, you can configure +this downtime as follows. In the control script, set the MAX_AGE value to the +maximum number of seconds of disk downtime that you would like. You should +then set your filesystem's commit interval to the same value. The dirty ratio +is also configurable from the control script. + +If you don't like the idea of the control script remounting your filesystems +for you, you can change DO_REMOUNTS to 0 in the script. + +Thanks to Kiko Piris, the control script can be used to enable laptop mode on +both the Linux 2.4 and 2.6 series. + + +Tips & Tricks +------------- + +* Bartek Kania reports getting up to 50 minutes of extra battery life (on top + of his regular 3 to 3.5 hours) using very aggressive power management (hdparm + -B1) and a spindown time of 5 seconds (hdparm -S1). + +* You can spin down the disk while playing MP3, by setting the disk readahead + to 8MB (hdparm -a 16384). Effectively, the disk will read a complete MP3 at + once, and will then spin down while the MP3 is playing. (Thanks to Bartek + Kania.) + +* Drew Scott Daniels observed: "I don't know why, but when I decrease the number + of colours that my display uses it consumes less battery power. I've seen + this on powerbooks too. I hope that this is a piece of information that + might be useful to the Laptop Mode patch or it's users." + + +Control script +-------------- + +Please note that this control script works for the Linux 2.4 and 2.6 series. + +--------------------CONTROL SCRIPT BEGIN------------------------------------------ +#!/bin/sh + +# start or stop laptop_mode, best run by a power management daemon when +# ac gets connected/disconnected from a laptop +# +# install as /sbin/laptop_mode +# +# Contributors to this script: Kiko Piris +# Bart Samwel +# Dax Kelson +# Original Linux 2.4 version by: Jens Axboe + +parse_mount_opts () { + echo "$*" | \ + sed 's/commit=[0-9]*//g' | \ + sed 's/,,*/,/g' | \ + sed 's/^,//' | \ + sed 's/,$//' | \ + cat - +} + +KLEVEL="$(uname -r | cut -c1-3)" +case "$KLEVEL" in + "2.4") + true + ;; + "2.6") + true + ;; + *) + echo "Unhandled kernel level: $KLEVEL ('uname -r' = '$(uname -r)')" + exit 1 + ;; +esac + +# Shall we remount journaled fs. with appropiate commit interval? (1=yes) +DO_REMOUNTS=1 + +# age time, in seconds. should be put into a sysconfig file +MAX_AGE=600 + +# Allowed dirty ratio, in pct. should be put into a sysconfig file as well. +DIRTY_RATIO=40 + +# kernel default dirty buffer age +DEF_AGE=30 +DEF_UPDATE=5 +DEF_DIRTY_BACKGROUND_RATIO=10 +DEF_DIRTY_RATIO=40 + + +if [ ! -e /proc/sys/vm/laptop_mode ]; then + echo "Kernel is not patched with laptop_mode patch." + exit 1 +fi + +if [ ! -w /proc/sys/vm/laptop_mode ]; then + echo "You do not have enough privileges to enable laptop_mode." + exit 1 +fi + +case "$1" in + start) + AGE=$((100*$MAX_AGE)) + echo -n "Starting laptop_mode" + case "$KLEVEL" in + "2.4") + echo "1" > /proc/sys/vm/laptop_mode + echo "30 500 0 0 $AGE $AGE 60 20 0" > /proc/sys/vm/bdflush + ;; + "2.6") + echo "1" > /proc/sys/vm/laptop_mode + echo "$AGE" > /proc/sys/vm/dirty_writeback_centisecs + echo "$AGE" > /proc/sys/vm/dirty_expire_centisecs + echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_ratio + echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_background_ratio + ;; + esac + if [ $DO_REMOUNTS -eq 1 ]; then + cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do + PARSEDOPTS="$(parse_mount_opts "$OPTS")" + case "$FST" in + "ext3") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + "reiserfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + "xfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + esac + done + fi + echo "." + ;; + stop) + U_AGE=$((100*$DEF_UPDATE)) + B_AGE=$((100*$DEF_AGE)) + echo -n "Stopping laptop_mode" + case "$KLEVEL" in + "2.4") + echo "0" > /proc/sys/vm/laptop_mode + echo "30 500 0 0 $U_AGE $B_AGE 60 20 0" > /proc/sys/vm/bdflush + ;; + "2.6") + echo "0" > /proc/sys/vm/laptop_mode + echo "$U_AGE" > /proc/sys/vm/dirty_writeback_centisecs + echo "$B_AGE" > /proc/sys/vm/dirty_expire_centisecs + echo "$DEF_DIRTY_RATIO" > /proc/sys/vm/dirty_ratio + echo "$DEF_DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio + ;; + esac + if [ $DO_REMOUNTS -eq 1 ]; then + cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do + PARSEDOPTS="$(parse_mount_opts "$OPTS")" + case "$FST" in + "ext3") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + "reiserfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + "xfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + esac + done + fi + echo "." + ;; + *) + echo "$0 {start|stop}" + ;; + +esac + +exit 0 + +--------------------CONTROL SCRIPT END-------------------------------------------- + + +ACPI integration +---------------- + +Dax Kelson submitted this so that the ACPI acpid daemon will +kick off the laptop_mode script and run hdparm. + +---------------------------/etc/acpi/events/ac_adapter BEGIN------------------------------------------- +event=ac_adapter +action=/etc/acpi/actions/battery.sh +---------------------------/etc/acpi/events/ac_adapter END------------------------------------------- + +---------------------------/etc/acpi/actions/battery.sh BEGIN------------------------------------------- +#!/bin/sh + +# cpu throttling +# cat /proc/acpi/processor/CPU0/throttling for more info +ACAD_THR=0 +BATT_THR=2 + +# spindown time for HD (man hdparm for valid values) +# I prefer 2 hours for acad and 20 seconds for batt +ACAD_HD=244 +BATT_HD=4 + +# ac/battery event handler + +status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/AC/state` + +case $status in + "on-line") + echo "Setting HD spindown to 2 hours" + /sbin/laptop-mode stop + /sbin/hdparm -S $ACAD_HD /dev/hda > /dev/null 2>&1 + /sbin/hdparm -B 255 /dev/hda > /dev/null 2>&1 + #echo -n $ACAD_CPU:$ACAD_THR > /proc/acpi/processor/CPU0/limit + exit 0 + ;; + "off-line") + echo "Setting HD spindown to 20 seconds" + /sbin/laptop-mode start + /sbin/hdparm -S $BATT_HD /dev/hda > /dev/null 2>&1 + /sbin/hdparm -B 1 /dev/hda > /dev/null 2>&1 + #echo -n $BATT_CPU:$BATT_THR > /proc/acpi/processor/CPU0/limit + exit 0 + ;; +esac +---------------------------/etc/acpi/actions/battery.sh END------------------------------------------- + +Monitoring tool +--------------- + +Bartek Kania submitted this, it can be used to measure how much time your disk +spends spun up/down. + +---------------------------dslm.c BEGIN------------------------------------------- +/* + * Simple Disk SLeep Monitor + * by Bartek Kania + * Licenced under the GPL + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define D(x) x +#else +#define D(x) +#endif + +int endit = 0; + +/* Check if the disk is in powersave-mode + * Most of the code is stolen from hdparm. + * 1 = active, 0 = standby/sleep, -1 = unknown */ +int check_powermode(int fd) +{ + unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0}; + int state; + + if (ioctl(fd, HDIO_DRIVE_CMD, &args) + && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */ + && ioctl(fd, HDIO_DRIVE_CMD, &args)) { + if (errno != EIO || args[0] != 0 || args[1] != 0) { + state = -1; /* "unknown"; */ + } else + state = 0; /* "sleeping"; */ + } else { + state = (args[2] == 255) ? 1 : 0; + } + D(printf(" drive state is: %s\n", state)); + + return state; +} + +char *state_name(int i) +{ + if (i == -1) return "unknown"; + if (i == 0) return "sleeping"; + if (i == 1) return "active"; + + return "internal error"; +} + +char *myctime(time_t time) +{ + char *ts = ctime(&time); + ts[strlen(ts) - 1] = 0; + + return ts; +} + +void measure(int fd) +{ + time_t start_time; + int last_state; + time_t last_time; + int curr_state; + time_t curr_time = 0; + time_t time_diff; + time_t active_time = 0; + time_t sleep_time = 0; + time_t unknown_time = 0; + time_t total_time = 0; + int changes = 0; + float tmp; + + printf("Starting measurements\n"); + + last_state = check_powermode(fd); + start_time = last_time = time(0); + printf(" System is in state %s\n\n", state_name(last_state)); + + while(!endit) { + sleep(1); + curr_state = check_powermode(fd); + + if (curr_state != last_state || endit) { + changes++; + curr_time = time(0); + time_diff = curr_time - last_time; + + if (last_state == 1) active_time += time_diff; + else if (last_state == 0) sleep_time += time_diff; + else unknown_time += time_diff; + + last_state = curr_state; + last_time = curr_time; + + printf("%s: State-change to %s\n", myctime(curr_time), + state_name(curr_state)); + } + } + changes--; /* Compensate for SIGINT */ + + total_time = time(0) - start_time; + printf("\nTotal running time: %lus\n", curr_time - start_time); + printf(" State changed %d times\n", changes); + + tmp = (float)sleep_time / (float)total_time * 100; + printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp); + tmp = (float)active_time / (float)total_time * 100; + printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp); + tmp = (float)unknown_time / (float)total_time * 100; + printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp); +} + +void ender(int s) +{ + endit = 1; +} + +void usage() +{ + puts("usage: dslm [-w