--- linux-2.6.2-rc1/arch/alpha/kernel/signal.c 2003-06-14 12:18:20.000000000 -0700 +++ 25/arch/alpha/kernel/signal.c 2004-01-21 20:11:14.000000000 -0800 @@ -201,10 +201,13 @@ sys_sigaltstack(const stack_t *uss, stac * Do a signal return; undo the signal stack. */ +#if _NSIG_WORDS > 1 +# error "Non SA_SIGINFO frame needs rearranging" +#endif + struct sigframe { struct sigcontext sc; - unsigned long extramask[_NSIG_WORDS-1]; unsigned int retcode[3]; }; @@ -268,19 +271,20 @@ restore_sigcontext(struct sigcontext *sc return err; } +/* Note that this syscall is also used by setcontext(3) to install + a given sigcontext. This because it's impossible to set *all* + registers and transfer control from userland. */ + asmlinkage void -do_sigreturn(struct sigframe *frame, struct pt_regs *regs, +do_sigreturn(struct sigcontext *sc, struct pt_regs *regs, struct switch_stack *sw) { sigset_t set; /* Verify that it's a good sigcontext before using it */ - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + if (verify_area(VERIFY_READ, sc, sizeof(*sc))) goto give_sigsegv; - if (__get_user(set.sig[0], &frame->sc.sc_mask) - || (_NSIG_WORDS > 1 - && __copy_from_user(&set.sig[1], &frame->extramask, - sizeof(frame->extramask)))) + if (__get_user(set.sig[0], &sc->sc_mask)) goto give_sigsegv; sigdelsetmask(&set, ~_BLOCKABLE); @@ -289,7 +293,7 @@ do_sigreturn(struct sigframe *frame, str recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(&frame->sc, regs, sw)) + if (restore_sigcontext(sc, regs, sw)) goto give_sigsegv; /* Send SIGTRAP if we're single-stepping: */ @@ -314,10 +318,9 @@ do_rt_sigreturn(struct rt_sigframe *fram struct switch_stack *sw) { sigset_t set; - stack_t st; - /* Verify that it's a good sigcontext before using it */ - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + /* Verify that it's a good ucontext_t before using it */ + if (verify_area(VERIFY_READ, &frame->uc, sizeof(frame->uc))) goto give_sigsegv; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto give_sigsegv; @@ -331,12 +334,6 @@ do_rt_sigreturn(struct rt_sigframe *fram if (restore_sigcontext(&frame->uc.uc_mcontext, regs, sw)) goto give_sigsegv; - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) - goto give_sigsegv; - /* It is more difficult to avoid calling this function than to - call it and ignore errors. */ - do_sigaltstack(&st, NULL, rdusp()); - /* Send SIGTRAP if we're single-stepping: */ if (ptrace_cancel_bpt (current)) { siginfo_t info; @@ -437,10 +434,6 @@ setup_frame(int sig, struct k_sigaction goto give_sigsegv; err |= setup_sigcontext(&frame->sc, regs, sw, set->sig[0], oldsp); - if (_NSIG_WORDS > 1) { - err |= __copy_to_user(frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - } if (err) goto give_sigsegv; --- linux-2.6.2-rc1/arch/cris/arch-v10/drivers/ethernet.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/cris/arch-v10/drivers/ethernet.c 2004-01-21 23:26:52.000000000 -0800 @@ -482,7 +482,7 @@ etrax_ethernet_init(void) /* Register device */ err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.2-rc1/arch/i386/boot/compressed/misc.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/boot/compressed/misc.c 2004-01-21 23:30:39.000000000 -0800 @@ -104,7 +104,7 @@ static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); -static void puts(const char *); +static void putstr(const char *); extern int end; static long free_mem_ptr = (long)&end; @@ -169,7 +169,7 @@ static void scroll(void) vidmem[i] = ' '; } -static void puts(const char *s) +static void putstr(const char *s) { int x,y,pos; char c; @@ -287,9 +287,9 @@ static void flush_window(void) static void error(char *x) { - puts("\n\n"); - puts(x); - puts("\n\n -- System halted"); + putstr("\n\n"); + putstr(x); + putstr("\n\n -- System halted"); while(1); /* Halt */ } @@ -373,9 +373,9 @@ asmlinkage int decompress_kernel(struct else setup_output_buffer_if_we_run_high(mv); makecrc(); - puts("Uncompressing Linux... "); + putstr("Uncompressing Linux... "); gunzip(); - puts("Ok, booting the kernel.\n"); + putstr("Ok, booting the kernel.\n"); if (high_loaded) close_output_buffer_if_we_run_high(mv); return high_loaded; } --- linux-2.6.2-rc1/arch/i386/boot/setup.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/boot/setup.S 2004-01-22 00:01:18.000000000 -0800 @@ -49,6 +49,8 @@ * by Matt Domsch October 2002 * conformant to T13 Committee www.t13.org * projects 1572D, 1484D, 1386D, 1226DT + * disk signature read by Matt Domsch + * and Andrew Wilks September 2003 */ #include @@ -162,7 +164,7 @@ cmd_line_ptr: .long 0 # (Header versio # can be located anywhere in # low memory 0x10000 or higher. -ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) +ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) # The highest safe address for # the contents of an initrd @@ -578,6 +580,25 @@ done_apm_bios: #endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) +# Read the first sector of device 80h and store the 4-byte signature + movl $0xFFFFFFFF, %eax + movl %eax, (DISK80_SIG_BUFFER) # assume failure + movb $READ_SECTORS, %ah + movb $1, %al # read 1 sector + movb $0x80, %dl # from device 80 + movb $0, %dh # at head 0 + movw $1, %cx # cylinder 0, sector 0 + pushw %es + pushw %ds + popw %es + movw $EDDBUF, %bx + int $0x13 + jc disk_sig_done + movl (EDDBUF+MBR_SIG_OFFSET), %eax + movl %eax, (DISK80_SIG_BUFFER) # store success +disk_sig_done: + popw %es + # Do the BIOS Enhanced Disk Drive calls # This consists of two calls: # int 13h ah=41h "Check Extensions Present" @@ -755,7 +776,7 @@ end_move_self: # now we are at the r # AMD Elan bug fix by Robert Schwebel. # -#if defined(CONFIG_MELAN) +#if defined(CONFIG_X86_ELAN) movb $0x02, %al # alternate A20 gate outb %al, $0x92 # this works on SC410/SC520 a20_elan_wait: --- linux-2.6.2-rc1/arch/i386/Kconfig 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/Kconfig 2004-01-22 00:01:18.000000000 -0800 @@ -43,6 +43,15 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. +config X86_ELAN + bool "AMD Elan" + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + config X86_VOYAGER bool "Voyager (NCR)" help @@ -130,278 +139,318 @@ config ES7000_CLUSTERED_APIC default y depends on SMP && X86_ES7000 && MPENTIUMIII -choice - prompt "Processor family" - default M686 +if !X86_ELAN -config M386 - bool "386" - ---help--- - This is the processor type of your CPU. This information is used for - optimizing purposes. In order to compile a kernel that can run on - all x86 CPU types (albeit not optimally fast), you can specify - "386" here. - - The kernel will not necessarily run on earlier architectures than - the one you have chosen, e.g. a Pentium optimized kernel will run on - a PPro, but not necessarily on a i486. - - Here are the settings recommended for greatest speed: - - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI - 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels - will run on a 386 class machine. - - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or - SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - - "586" for generic Pentium CPUs lacking the TSC - (time stamp counter) register. - - "Pentium-Classic" for the Intel Pentium. - - "Pentium-MMX" for the Intel Pentium MMX. - - "Pentium-Pro" for the Intel Pentium Pro. - - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. - - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. - - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. - - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). - - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). - - "Crusoe" for the Transmeta Crusoe series. - - "Winchip-C6" for original IDT Winchip. - - "Winchip-2" for IDT Winchip 2. - - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). +menu "Processor support" - If you don't know what to do, choose "386". +comment "Select all processors your kernel should support" -config M486 +config CPU_386 + bool "386" + default n + help + Select this for a 386 series processor. + +config CPU_486 bool "486" + default y help Select this for a 486 series processor, either Intel or one of the compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. -config M586 +config CPU_586 bool "586/K5/5x86/6x86/6x86MX" + default y help - Select this for an 586 or 686 series processor such as the AMD K5, - the Intel 5x86 or 6x86, or the Intel 6x86MX. This choice does not - assume the RDTSC (Read Time Stamp Counter) instruction. + Select this for a non-Intel 586 or 686 series processor such as + the AMD K5 or the Cyrix 6x86MX. + + Several CPUs that have their own options below (e.g. AMD K6, + Duron, Athlon and Opteeron, IDT Winchip, Cyrix III and + VIA C3) do _not_ need this option. + + This choice does not assume the RDTSC (Read Time Stamp Counter) + instruction. -config M586TSC +config CPU_586TSC bool "Pentium-Classic" + default y help Select this for a Pentium Classic processor with the RDTSC (Read - Time Stamp Counter) instruction for benchmarking. + Time Stamp Counter) instruction. -config M586MMX +config CPU_586MMX bool "Pentium-MMX" + default y help Select this for a Pentium with the MMX graphics/multimedia extended instructions. -config M686 +config CPU_686 bool "Pentium-Pro" + default y help - Select this for Intel Pentium Pro chips. This enables the use of - Pentium Pro extended instructions, and disables the init-time guard - against the f00f bug found in earlier Pentiums. + Select this for Intel Pentium Pro chips. -config MPENTIUMII +config CPU_PENTIUMII bool "Pentium-II/Celeron(pre-Coppermine)" + default y help Select this for Intel chips based on the Pentium-II and - pre-Coppermine Celeron core. This option enables an unaligned - copy optimization, compiles the kernel with optimization flags - tailored for the chip, and applies any applicable Pentium Pro - optimizations. + pre-Coppermine Celeron core. -config MPENTIUMIII +config CPU_PENTIUMIII bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + default y help Select this for Intel chips based on the Pentium-III and - Celeron-Coppermine core. This option enables use of some - extended prefetch instructions in addition to the Pentium II - extensions. - -config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Xeon" - help - Select this for Intel Pentium 4 chips. This includes both - the Pentium 4 and P4-based Celeron chips. This option - enables compile flags optimized for the chip, uses the - correct cache shift, and applies any applicable Pentium III - optimizations. + Celeron-Coppermine core. + +config CPU_PENTIUMM + bool "Pentium M" + default y + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. -config MK6 +config CPU_PENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + default y + help + Select this for Intel Pentium 4 chips. This includes + the Pentium 4, P4-based Celeron and Xeon, and + Pentium-4 M (not Pentium M) chips. + +config CPU_K6 bool "K6/K6-II/K6-III" + default y help - Select this for an AMD K6-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD K6, K6-II or K6-III (aka K6-3D). -config MK7 +config CPU_K7 bool "Athlon/Duron/K7" + default y help - Select this for an AMD Athlon K7-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD Athlon K7-family processor. -config MK8 +config CPU_K8 bool "Opteron/Athlon64/Hammer/K8" + default y help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. - -config MELAN - bool "Elan" + Select this for an AMD Opteron or Athlon64 Hammer-family processor. -config MCRUSOE +config CPU_CRUSOE bool "Crusoe" + default y help - Select this for a Transmeta Crusoe processor. Treats the processor - like a 586 with TSC, and sets some GCC optimization flags (like a - Pentium Pro with no alignment requirements). + Select this for a Transmeta Crusoe processor. -config MWINCHIPC6 +config CPU_WINCHIPC6 bool "Winchip-C6" + default y help - Select this for an IDT Winchip C6 chip. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. + Select this for an IDT Winchip C6 chip. -config MWINCHIP2 +config CPU_WINCHIP2 bool "Winchip-2" + default y help - Select this for an IDT Winchip-2. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. + Select this for an IDT Winchip-2. -config MWINCHIP3D +config CPU_WINCHIP3D bool "Winchip-2A/Winchip-3" + default y help - Select this for an IDT Winchip-2A or 3. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment reqirements. Also enable out of order memory - stores for this CPU, which can increase performance of some - operations. - -config MCYRIXIII - bool "CyrixIII/VIA-C3" - help - Select this for a Cyrix III or C3 chip. Presently Linux and GCC - treat this chip as a generic 586. Whilst the CPU is 686 class, - it lacks the cmov extension which gcc assumes is present when - generating 686 code. - Note that Nehemiah (Model 9) and above will not boot with this - kernel due to them lacking the 3DNow! instructions used in earlier - incarnations of the CPU. + Select this for an IDT Winchip-2A or 3 with 3dNow! + capabilities. + +config CPU_CYRIXIII + bool "Cyrix III/VIA C3" + default y + help + Select this for a Cyrix III or VIA C3 chip. + + Note that Nehemiah (Model 9) and above need the next + option instead. -config MVIAC3_2 +config CPU_VIAC3_2 bool "VIA C3-2 (Nehemiah)" + default y help - Select this for a VIA C3 "Nehemiah". Selecting this enables usage - of SSE and tells gcc to treat the CPU as a 686. - Note, this kernel will not boot on older (pre model 9) C3s. + Select this for a VIA C3 "Nehemiah" (model 9 and above). -endchoice +endmenu -config X86_GENERIC - bool "Generic x86 support" - help - Including some tuning for non selected x86 CPUs too. - when it has moderate overhead. This is intended for generic - distributions kernels. +endif + +# +# helper options +# +config CPU_INTEL + bool + depends on CPU_386 || CPU_486 || CPU_586TSC || CPU_686 || CPU_PENTIUMII || CPU_PENTIUMIII || CPU_PENTIUMM || CPU_PENTIUM4 + default y + +config CPU_WINCHIP + bool + depends on CPU_WINCHIPC6 || CPU_WINCHIP2 || CPU_WINCHIP3D + default y + +config CPU_ONLY_K7 + bool + depends on CPU_K7 && !CPU_INTEL && !CPU_K6 && !CPU_K8 && !X86_ELAN && !CPU_CRUSOE && !CPU_WINCHIP && !CPU_CYRIXIII && !CPU_VIAC3_2 + default y + +config CPU_ONLY_K8 + bool + depends on CPU_K8 && !CPU_INTEL && !CPU_K6 && !CPU_K7 && !X86_ELAN && !CPU_CRUSOE && !CPU_WINCHIP && !CPU_CYRIXIII && !CPU_VIAC3_2 + default y + +config CPU_ONLY_WINCHIP + bool + depends on CPU_WINCHIP && !CPU_INTEL && !CPU_K6 && !CPU_K7 && !CPU_K8 && !X86_ELAN && !CPU_CRUSOE && !CPU_CYRIXIII && !CPU_VIAC3_2 + default y # # Define implied options from the CPU selection here # config X86_CMPXCHG bool - depends on !M386 + depends on !CPU_386 default y config X86_XADD bool - depends on !M386 + depends on !CPU_386 default y config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC - default "4" if MELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 - default "6" if MK7 || MK8 + default "7" if CPU_PENTIUM4 + default "6" if CPU_K7 || CPU_K8 || CPU_PENTIUMM + default "5" if CPU_WINCHIP || CPU_CRUSOE || CPU_CYRIXIII || CPU_K6 || CPU_PENTIUMIII || CPU_PENTIUMII || CPU_686 || CPU_586MMX || CPU_586TSC || CPU_586 || CPU_VIAC3_2 + default "4" if X86_ELAN || CPU_486 || CPU_386 config RWSEM_GENERIC_SPINLOCK bool - depends on M386 + depends on CPU_386 default y config RWSEM_XCHGADD_ALGORITHM bool - depends on !M386 + depends on !CPU_386 default y config X86_PPRO_FENCE bool - depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 + depends on CPU_686 default y config X86_F00F_BUG bool - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on CPU_586MMX || CPU_586TSC default y config X86_WP_WORKS_OK bool - depends on !M386 + depends on !CPU_386 default y config X86_INVLPG bool - depends on !M386 + depends on !CPU_386 default y config X86_BSWAP bool - depends on !M386 + depends on !CPU_386 default y config X86_POPAD_OK bool - depends on !M386 + depends on !CPU_386 default y config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 + depends on CPU_WINCHIP || CPU_CYRIXIII || X86_ELAN || CPU_K6 || CPU_586MMX || CPU_586TSC || CPU_586 || CPU_486 || CPU_VIAC3_2 default y -config X86_GOOD_APIC +config X86_BAD_APIC bool - depends on MK7 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 + depends on CPU_586TSC default y config X86_INTEL_USERCOPY bool - depends on MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 + depends on CPU_K7 || CPU_K8 || CPU_PENTIUMII || CPU_PENTIUMIII || CPU_PENTIUMM || CPU_PENTIUM4 default y config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 + depends on !CPU_386 && !CPU_486 && !CPU_586 && !CPU_586TSC && !CPU_586MMX && !X86_ELAN && !CPU_CRUSOE default y config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on !CPU_INTEL && !CPU_K6 && !CPU_K8 && !X86_ELAN && !CPU_CRUSOE && !CPU_WINCHIP && !CPU_VIAC3_2 default y config X86_OOSTORE bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 + depends on CPU_ONLY_WINCHIP default y +config X86_4G + bool "4 GB kernel-space and 4 GB user-space virtual memory support" + help + This option is only useful for systems that have more than 1 GB + of RAM. + + The default kernel VM layout leaves 1 GB of virtual memory for + kernel-space mappings, and 3 GB of VM for user-space applications. + This option ups both the kernel-space VM and the user-space VM to + 4 GB. + + The cost of this option is additional TLB flushes done at + system-entry points that transition from user-mode into kernel-mode. + I.e. system calls and page faults, and IRQs that interrupt user-mode + code. There's also additional overhead to kernel operations that copy + memory to/from user-space. The overhead from this is hard to tell and + depends on the workload - it can be anything from no visible overhead + to 20-30% overhead. A good rule of thumb is to count with a runtime + overhead of 20%. + + The upside is the much increased kernel-space VM, which more than + quadruples the maximum amount of RAM supported. Kernels compiled with + this option boot on 64GB of RAM and still have more than 3.1 GB of + 'lowmem' left. Another bonus is that highmem IO bouncing decreases, + if used with drivers that still use bounce-buffers. + + There's also a 33% increase in user-space VM size - database + applications might see a boost from this. + + But the cost of the TLB flushes and the runtime overhead has to be + weighed against the bonuses offered by the larger VM spaces. The + dividing line depends on the actual workload - there might be 4 GB + systems that benefit from this option. Systems with less than 4 GB + of RAM will rarely see a benefit from this option - but it's not + out of question, the exact circumstances have to be considered. + +config X86_SWITCH_PAGETABLES + def_bool X86_4G + +config X86_4G_VM_LAYOUT + def_bool X86_4G + +config X86_UACCESS_INDIRECT + def_bool X86_4G + +config X86_HIGH_ENTRY + def_bool X86_4G + config HPET_TIMER bool "HPET Timer Support" help @@ -459,6 +508,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help @@ -513,7 +572,7 @@ config X86_IO_APIC config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ + depends on !X86_NUMAQ && !CPU_386 && !CPU_486 && !CPU_586 && !X86_ELAN && !CPU_WINCHIPC6 default y config X86_MCE @@ -603,8 +662,6 @@ config MICROCODE To compile this driver as a module, choose M here: the module will be called microcode. - If you use modprobe or kmod you may also want to add the line - 'alias char-major-10-184 microcode' to your /etc/modules.conf file. config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -821,6 +878,19 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y +config REGPARM + bool "Use register arguments (EXPERIMENTAL)" + depends on EXPERIMENTAL + default n + help + Compile the kernel with -mregparm=3. This uses an different ABI + and passes the first three arguments of a function call in registers. + This will probably break binary only modules. + + This feature is only enabled for gcc-3.0 and later - earlier compilers + generate incorrect output with certain kernel constructs when + -mregparm=3 is used. + endmenu @@ -1231,6 +1301,15 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config SPINLINE + bool "Spinlock inlining" + depends on DEBUG_KERNEL + help + This will change spinlocks from out of line to inline, making them + account cost to the callers in readprofile, rather than the lock + itself (as ".text.lock.filename"). This can be helpful for finding + the callers of locks. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM @@ -1247,20 +1326,208 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + config FRAME_POINTER bool "Compile the kernel with frame pointers" + default KGDB help If you say Y here the resulting kernel image will be slightly larger and slower, but it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame pointers. +config MAGIC_SYSRQ + bool + depends on KGDB_SYSRQ + default y + config X86_EXTRA_IRQS bool depends on X86_LOCAL_APIC || X86_VOYAGER --- linux-2.6.2-rc1/arch/i386/kernel/acpi/boot.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/acpi/boot.c 2004-01-22 00:01:18.000000000 -0800 @@ -32,6 +32,7 @@ #include #include #include +#include #if defined (CONFIG_X86_LOCAL_APIC) #include @@ -41,9 +42,8 @@ #define PREFIX "ACPI: " -extern int acpi_disabled; -extern int acpi_irq; -extern int acpi_ht; +int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ +int acpi_ht __initdata = 1; /* enable HT */ int acpi_lapic = 0; int acpi_ioapic = 0; @@ -250,32 +250,76 @@ acpi_parse_nmi_src ( #ifdef CONFIG_ACPI_BUS /* - * Set specified PIC IRQ to level triggered mode. + * "acpi_pic_sci=level" (current default) + * programs the PIC-mode SCI to Level Trigger. + * (NO-OP if the BIOS set Level Trigger already) + * + * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's + * it may require Edge Trigger -- use "acpi_pic_sci=edge" + * (NO-OP if the BIOS set Edge Trigger already) * * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) - * - * As the BIOS should have done this for us, - * print a warning if the IRQ wasn't already set to level. */ -void acpi_pic_set_level_irq(unsigned int irq) +static int __initdata acpi_pic_sci_trigger; /* 0: level, 1: edge */ + +void __init +acpi_pic_sci_set_trigger(unsigned int irq) { unsigned char mask = 1 << (irq & 7); unsigned int port = 0x4d0 + (irq >> 3); unsigned char val = inb(port); + + printk(PREFIX "IRQ%d SCI:", irq); if (!(val & mask)) { - printk(KERN_WARNING PREFIX "IRQ %d was Edge Triggered, " - "setting to Level Triggerd\n", irq); - outb(val | mask, port); + printk(" Edge"); + + if (!acpi_pic_sci_trigger) { + printk(" set to Level"); + outb(val | mask, port); + } + } else { + printk(" Level"); + + if (acpi_pic_sci_trigger) { + printk(" set to Edge"); + outb(val | mask, port); + } } + printk(" Trigger.\n"); } -#endif /* CONFIG_ACPI_BUS */ +int __init +acpi_pic_sci_setup(char *str) +{ + while (str && *str) { + if (strncmp(str, "level", 5) == 0) + acpi_pic_sci_trigger = 0; /* force level trigger */ + if (strncmp(str, "edge", 4) == 0) + acpi_pic_sci_trigger = 1; /* force edge trigger */ + str = strchr(str, ','); + if (str) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_pic_sci=", acpi_pic_sci_setup); + +#endif /* CONFIG_ACPI_BUS */ +#ifdef CONFIG_X86_IO_APIC +int acpi_irq_to_vector(u32 irq) +{ + if (use_pci_vector() && !platform_legacy_irq(irq)) + irq = IO_APIC_VECTOR(irq); + return irq; +} +#endif static unsigned long __init acpi_scan_rsdp ( @@ -290,7 +334,7 @@ acpi_scan_rsdp ( * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } @@ -327,6 +371,37 @@ static int __init acpi_parse_hpet(unsign } #endif +/* detect the location of the ACPI PM Timer */ +#ifdef CONFIG_X86_PM_TIMER +extern u32 pmtmr_ioport; + +static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) +{ + struct fadt_descriptor_rev2 *fadt =0; + + fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); + if(!fadt) { + printk(KERN_WARNING PREFIX "Unable to map FADT\n"); + return 0; + } + + if (fadt->revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ + if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + pmtmr_ioport = fadt->xpm_tmr_blk.address; + } else { + /* FADT rev. 1 */ + pmtmr_ioport = fadt->V1_pm_tmr_blk; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); + return 0; +} +#endif + + unsigned long __init acpi_find_rsdp (void) { @@ -387,8 +462,10 @@ acpi_boot_init (void) * Initialize the ACPI boot-time table parser. */ result = acpi_table_init(); - if (result) + if (result) { + acpi_disabled = 1; return result; + } result = acpi_blacklisted(); if (result) { @@ -397,6 +474,10 @@ acpi_boot_init (void) return result; } +#ifdef CONFIG_X86_PM_TIMER + acpi_table_parse(ACPI_FADT, acpi_parse_fadt); +#endif + #ifdef CONFIG_X86_LOCAL_APIC /* @@ -469,7 +550,7 @@ acpi_boot_init (void) * If MPS is present, it will handle them, * otherwise the system will stay in PIC mode */ - if (acpi_disabled || !acpi_irq) { + if (acpi_disabled || acpi_noirq) { return 1; } @@ -511,6 +592,8 @@ acpi_boot_init (void) acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + acpi_irq_balance_set(NULL); + acpi_ioapic = 1; #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ --- linux-2.6.2-rc1/arch/i386/kernel/asm-offsets.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/asm-offsets.c 2004-01-22 00:01:18.000000000 -0800 @@ -4,9 +4,11 @@ * to extract and format the required data. */ +#include #include #include #include "sigframe.h" +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -28,4 +30,17 @@ void foo(void) DEFINE(RT_SIGFRAME_sigcontext, offsetof (struct rt_sigframe, uc.uc_mcontext)); + DEFINE(TI_task, offsetof (struct thread_info, task)); + DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); + DEFINE(TI_flags, offsetof (struct thread_info, flags)); + DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); + DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); + DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); + DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); + DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); + + DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); + DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); } --- linux-2.6.2-rc1/arch/i386/kernel/cpu/common.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/common.c 2004-01-22 00:01:18.000000000 -0800 @@ -514,12 +514,16 @@ void __init cpu_init (void) set_tss_desc(cpu,t); cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); - load_LDT(&init_mm.context); + if (cpu) + load_LDT(&init_mm.context); /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + if (cpu) + trap_init_virtual_GDT(); + /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); --- linux-2.6.2-rc1/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/Kconfig 2004-01-21 23:30:03.000000000 -0800 @@ -54,7 +54,7 @@ config X86_ACPI_CPUFREQ_PROC_INTF config ELAN_CPUFREQ tristate "AMD Elan" - depends on CPU_FREQ_TABLE && MELAN + depends on CPU_FREQ_TABLE && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC400 and SC410 processors. --- linux-2.6.2-rc1/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-01-21 23:27:16.000000000 -0800 @@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int u32 l, h; cpumask_t cpus_allowed, affected_cpu_map; struct cpufreq_freqs freqs; - int hyperthreading = 0; - int sibling = 0; + int j; if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) @@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int cpus_allowed = current->cpus_allowed; /* only run on CPU to be set, or on its sibling */ - affected_cpu_map = cpumask_of_cpu(cpu); -#ifdef CONFIG_X86_HT - hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2)); - if (hyperthreading) { - sibling = cpu_sibling_map[cpu]; - cpu_set(sibling, affected_cpu_map); - } +#ifdef CONFIG_SMP + affected_cpu_map = cpu_sibling_map[cpu]; +#else + affected_cpu_map = cpumask_of_cpu(cpu); #endif set_cpus_allowed(current, affected_cpu_map); BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map)); @@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int /* notifiers */ freqs.old = stock_freq * l / 8; freqs.new = stock_freq * newstate / 8; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (hyperthreading) { - freqs.cpu = sibling; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } } rdmsr(MSR_IA32_THERM_STATUS, l, h); @@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int set_cpus_allowed(current, cpus_allowed); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - if (hyperthreading) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } return 0; --- linux-2.6.2-rc1/arch/i386/kernel/cpuid.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/cpuid.c 2004-01-21 23:30:34.000000000 -0800 @@ -1,4 +1,3 @@ -#ident "$Id$" /* ----------------------------------------------------------------------- * * * Copyright 2000 H. Peter Anvin - All Rights Reserved --- linux-2.6.2-rc1/arch/i386/kernel/cpu/intel.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/kernel/cpu/intel.c 2004-01-22 00:01:18.000000000 -0800 @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu.h" @@ -19,8 +20,6 @@ #include #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -165,7 +164,7 @@ static void __init init_intel(struct cpu c->f00f_bug = 1; if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); + trap_init_virtual_IDT(); printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } @@ -248,6 +247,12 @@ static void __init init_intel(struct cpu /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_bit(X86_FEATURE_SEP, c->x86_capability); + /* + * FIXME: SEP is disabled for 4G/4G for now: + */ +#ifdef CONFIG_X86_HIGH_ENTRY + clear_bit(X86_FEATURE_SEP, c->x86_capability); +#endif /* Names for the Pentium II/Celeron processors detectable only by also checking the cache size. --- linux-2.6.2-rc1/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/kernel/cpu/mcheck/non-fatal.c 2004-01-21 20:11:14.000000000 -0800 @@ -74,6 +74,16 @@ static void mce_timerfunc (unsigned long static int __init init_nonfatal_mce_checker(void) { + struct cpuinfo_x86 *c = &boot_cpu_data; + + /* Check for MCE support */ + if (!cpu_has(c, X86_FEATURE_MCE)) + return -ENODEV; + + /* Check for PPro style MCA */ + if (!cpu_has(c, X86_FEATURE_MCA)) + return -ENODEV; + /* Some Athlons misbehave when we frob bank 0 */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 6) --- linux-2.6.2-rc1/arch/i386/kernel/dmi_scan.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/kernel/dmi_scan.c 2004-01-21 23:27:03.000000000 -0800 @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ EXPORT_SYMBOL(dmi_broken); int is_sony_vaio_laptop; int is_unsafe_smbus; +int es7000_plat = 0; struct dmi_header { @@ -496,6 +498,7 @@ static __init int print_if_true(struct d } +#ifdef CONFIG_ACPI_BOOT extern int acpi_disabled, acpi_force; static __init __attribute__((unused)) int acpi_disable(struct dmi_blacklist *d) @@ -510,8 +513,6 @@ static __init __attribute__((unused)) in return 0; } - -#ifdef CONFIG_ACPI_BOOT extern int acpi_ht; /* @@ -534,10 +535,8 @@ static __init __attribute__((unused)) in #ifdef CONFIG_ACPI_PCI static __init int disable_acpi_pci(struct dmi_blacklist *d) { - extern __init void pci_disable_acpi(void) ; - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", d->ident); - pci_disable_acpi(); + acpi_noirq_set(); return 0; } #endif @@ -1003,6 +1002,7 @@ static __init void dmi_check_blacklist(v printk(KERN_NOTICE "ACPI disabled because your bios is from %s and too old\n", s); printk(KERN_NOTICE "You can enable it with acpi=force\n"); acpi_disabled = 1; + acpi_ht = 0; } } } --- linux-2.6.2-rc1/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/doublefault.c 2004-01-22 00:01:18.000000000 -0800 @@ -7,12 +7,13 @@ #include #include #include +#include #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) -#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) +#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) static void doublefault_fn(void) { @@ -38,8 +39,8 @@ static void doublefault_fn(void) printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", t->eax, t->ebx, t->ecx, t->edx); - printk("esi = %08lx, edi = %08lx\n", - t->esi, t->edi); + printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", + t->esi, t->edi, t->ebp); } } --- linux-2.6.2-rc1/arch/i386/kernel/edd.c 2003-10-17 15:58:03.000000000 -0700 +++ 25/arch/i386/kernel/edd.c 2004-01-21 23:30:45.000000000 -0800 @@ -2,6 +2,7 @@ * linux/arch/i386/kernel/edd.c * Copyright (C) 2002, 2003 Dell Inc. * by Matt Domsch + * disk80 signature by Matt Domsch, Andrew Wilks, and Sandeep K. Shandilya * * BIOS Enhanced Disk Drive Services (EDD) * conformant to T13 Committee www.t13.org @@ -59,7 +60,7 @@ MODULE_AUTHOR("Matt Domsch device == 0x80; +} + static EDD_DEVICE_ATTR(raw_data, 0444, edd_show_raw_data, NULL); static EDD_DEVICE_ATTR(version, 0444, edd_show_version, NULL); static EDD_DEVICE_ATTR(extensions, 0444, edd_show_extensions, NULL); @@ -443,6 +461,7 @@ static EDD_DEVICE_ATTR(default_sectors_p edd_has_default_sectors_per_track); static EDD_DEVICE_ATTR(interface, 0444, edd_show_interface, edd_has_edd30); static EDD_DEVICE_ATTR(host_bus, 0444, edd_show_host_bus, edd_has_edd30); +static EDD_DEVICE_ATTR(mbr_signature, 0444, edd_show_disk80_sig, edd_has_disk80_sig); /* These are default attributes that are added for every edd @@ -464,6 +483,7 @@ static struct edd_attribute * edd_attrs[ &edd_attr_default_sectors_per_track, &edd_attr_interface, &edd_attr_host_bus, + &edd_attr_mbr_signature, NULL, }; --- linux-2.6.2-rc1/arch/i386/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/kernel/entry.S 2004-01-22 00:01:18.000000000 -0800 @@ -43,11 +43,25 @@ #include #include #include +#include #include #include +#include #include #include #include "irq_vectors.h" + /* We do not recover from a stack overflow, but at least + * we know it happened and should be able to track it down. + */ +#ifdef CONFIG_STACK_OVERFLOW_TEST +#define STACK_OVERFLOW_TEST \ + testl $7680,%esp; \ + jnz 10f; \ + call stack_overflow; \ +10: +#else +#define STACK_OVERFLOW_TEST +#endif #define nr_syscalls ((syscall_table_size)/4) @@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) #define resume_kernel restore_all #endif -#define SAVE_ALL \ +#ifdef CONFIG_X86_HIGH_ENTRY + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +/* + * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, + * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is + * left stale, so we must check whether to repeat the real stack calculation. + */ +#define repeat_if_esp_changed \ + xorl %esp, %ebp; \ + testl $0xffffe000, %ebp; \ + jnz 0b +#else +#define repeat_if_esp_changed +#endif + +/* clobbers ebx, edx and ebp */ + +#define __SWITCH_KERNELSPACE \ + cmpl $0xff000000, %esp; \ + jb 1f; \ + \ + /* \ + * switch pagetables and load the real stack, \ + * keep the stack offset: \ + */ \ + \ + movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ + \ + /* GET_THREAD_INFO(%ebp) intermixed */ \ +0: \ + movl %esp, %ebp; \ + movl %esp, %ebx; \ + andl $0xffffe000, %ebp; \ + andl $0x00001fff, %ebx; \ + orl TI_real_stack(%ebp), %ebx; \ + repeat_if_esp_changed; \ + \ + movl %edx, %cr3; \ + movl %ebx, %esp; \ +1: + +#endif + + +#define __SWITCH_USERSPACE \ + /* interrupted any of the user return paths? */ \ + \ + movl EIP(%esp), %eax; \ + \ + cmpl $int80_ret_start_marker, %eax; \ + jb 33f; /* nope - continue with sysexit check */\ + cmpl $int80_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ +33: \ + cmpl $sysexit_ret_start_marker, %eax; \ + jb 44f; /* nope - continue with user check */ \ + cmpl $sysexit_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ + /* return to userspace? */ \ +44: \ + movl EFLAGS(%esp),%ecx; \ + movb CS(%esp),%cl; \ + testl $(VM_MASK | 3),%ecx; \ + jz 2f; \ +22: \ + /* \ + * switch to the virtual stack, then switch to \ + * the userspace pagetables. \ + */ \ + \ + GET_THREAD_INFO(%ebp); \ + movl TI_virtual_stack(%ebp), %edx; \ + movl TI_user_pgd(%ebp), %ecx; \ + \ + movl %esp, %ebx; \ + andl $0x1fff, %ebx; \ + orl %ebx, %edx; \ +int80_ret_start_marker: \ + movl %edx, %esp; \ + movl %ecx, %cr3; \ + \ + __RESTORE_ALL; \ +int80_ret_end_marker: \ +2: + +#else /* !CONFIG_X86_HIGH_ENTRY */ + +#define __SWITCH_KERNELSPACE +#define __SWITCH_USERSPACE + +#endif + +#define __SAVE_ALL \ cld; \ pushl %es; \ pushl %ds; \ @@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) movl %edx, %ds; \ movl %edx, %es; -#define RESTORE_INT_REGS \ +#define __RESTORE_INT_REGS \ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) popl %ebp; \ popl %eax -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ -2: popl %es; \ +#define __RESTORE_REGS \ + __RESTORE_INT_REGS; \ +111: popl %ds; \ +222: popl %es; \ .section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ -4: movl $0,(%esp); \ - jmp 2b; \ +444: movl $0,(%esp); \ + jmp 111b; \ +555: movl $0,(%esp); \ + jmp 222b; \ .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ + .long 111b,444b;\ + .long 222b,555b;\ .previous - -#define RESTORE_ALL \ - RESTORE_REGS \ +#define __RESTORE_ALL \ + __RESTORE_REGS \ addl $4, %esp; \ -1: iret; \ +333: iret; \ .section .fixup,"ax"; \ -2: sti; \ +666: sti; \ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ @@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,2b; \ + .long 333b,666b;\ .previous +#define SAVE_ALL \ + __SAVE_ALL; \ + __SWITCH_KERNELSPACE; \ + STACK_OVERFLOW_TEST; + +#define RESTORE_ALL \ + __SWITCH_USERSPACE; \ + __RESTORE_ALL; +.section .entry.text,"ax" ENTRY(lcall7) pushfl # We get a different stack layout with call @@ -163,7 +280,7 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # andl $-8192, %ebp # GET_THREAD_INFO - movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain + movl TI_exec_domain(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp popl %eax @@ -208,7 +325,7 @@ ENTRY(resume_userspace) cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending @@ -216,18 +333,18 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all need_resched: - movl TI_FLAGS(%ebp), %ecx # need_resched set ? + movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all - movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) + movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) sti call schedule - movl $0,TI_PRE_COUNT(%ebp) + movl $0,TI_preempt_count(%ebp) cli jmp need_resched #endif @@ -246,37 +363,50 @@ sysenter_past_esp: pushl $(__USER_CS) pushl $SYSENTER_RETURN -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - pushl %eax SAVE_ALL GET_THREAD_INFO(%ebp) cmpl $(nr_syscalls), %eax jae syscall_badsys - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + + GET_THREAD_INFO(%ebp) + movl TI_virtual_stack(%ebp), %edx + movl TI_user_pgd(%ebp), %ecx + movl %esp, %ebx + andl $0x1fff, %ebx + orl %ebx, %edx +sysexit_ret_start_marker: + movl %edx, %esp + movl %ecx, %cr3 +#endif + /* + * only ebx is not restored by the userspace sysenter vsyscall + * code, it assumes it to be callee-saved. + */ + movl EBX(%esp), %ebx + /* if something modifies registers it must also disable sysexit */ + movl EIP(%esp), %edx movl OLDESP(%esp), %ecx + sti sysexit +#ifdef CONFIG_X86_SWITCH_PAGETABLES +sysexit_ret_end_marker: + nop +#endif # system call handler stub @@ -287,7 +417,7 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) @@ -296,10 +426,23 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work restore_all: +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernelX # returning to kernel or vm86-space + + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jz resume_kernelX + + int $3 + +resume_kernelX: +#endif RESTORE_ALL # perform work that needs to be done immediately before resumption @@ -312,7 +455,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all @@ -327,6 +470,22 @@ work_notifysig: # deal with pending s # vm86-space xorl %edx, %edx call do_notify_resume + +#if CONFIG_X86_HIGH_ENTRY + /* + * Reload db7 if necessary: + */ + movl TI_flags(%ebp), %ecx + testb $_TIF_DB7, %cl + jnz work_db7 + + jmp restore_all + +work_db7: + movl TI_task(%ebp), %edx; + movl task_thread_db7(%edx), %edx; + movl %edx, %db7; +#endif jmp restore_all ALIGN @@ -382,7 +541,7 @@ syscall_badsys: */ .data ENTRY(interrupt) -.text +.previous vector=0 ENTRY(irq_entries_start) @@ -392,7 +551,7 @@ ENTRY(irq_entries_start) jmp common_interrupt .data .long 1b -.text +.previous vector=vector+1 .endr @@ -433,12 +592,17 @@ error_code: movl ES(%esp), %edi # get the function address movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) - movl %esp, %edx pushl %esi # push the error code - pushl %edx # push the pt_regs pointer movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es + +/* clobbers edx, ebx and ebp */ + __SWITCH_KERNELSPACE + + leal 4(%esp), %edx # prepare pt_regs + pushl %edx # push pt_regs + call *%edi addl $8, %esp jmp ret_from_exception @@ -529,7 +693,7 @@ nmi_stack_correct: pushl %edx call do_nmi addl $8, %esp - RESTORE_ALL + jmp restore_all nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -606,6 +770,8 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.previous + .data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/entry_trampoline.c 2004-01-22 00:01:18.000000000 -0800 @@ -0,0 +1,75 @@ +/* + * linux/arch/i386/kernel/entry_trampoline.c + * + * (C) Copyright 2003 Ingo Molnar + * + * This file contains the needed support code for 4GB userspace + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; + +void __init init_entry_mappings(void) +{ +#ifdef CONFIG_X86_HIGH_ENTRY + void *tramp; + + /* + * We need a high IDT and GDT for the 4G/4G split: + */ + trap_init_virtual_IDT(); + + __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); + __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); + tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); + + printk("mapped 4G/4G trampoline to %p.\n", tramp); + BUG_ON((void *)&__start___entry_text != tramp); + /* + * Virtual kernel stack: + */ + BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); + BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); + BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); + + /* + * set up the initial thread's virtual stack related + * fields: + */ + current->thread.stack_page0 = virt_to_page((char *)current->thread_info); + current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); + current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); + + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); + __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); + +#endif + printk("current: %p\n", current); + printk("current->thread_info: %p\n", current->thread_info); + current->thread_info->real_stack = (void *)current->thread_info; + current->thread_info->user_pgd = NULL; + current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; +} + + + +void __init entry_trampoline_setup(void) +{ + /* + * old IRQ entries set up by the boot code will still hang + * around - they are a sign of hw trouble anyway, now they'll + * produce a double fault message. + */ + trap_init_virtual_GDT(); +} --- linux-2.6.2-rc1/arch/i386/kernel/head.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/head.S 2004-01-22 00:01:18.000000000 -0800 @@ -16,6 +16,7 @@ #include #include #include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -330,7 +331,7 @@ ENTRY(stack_start) /* This is the default interrupt "handler" :-) */ int_msg: - .asciz "Unknown interrupt\n" + .asciz "Unknown interrupt or fault at EIP %p %p %p\n" ALIGN ignore_int: cld @@ -342,9 +343,17 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es + pushl 16(%esp) + pushl 24(%esp) + pushl 32(%esp) + pushl 40(%esp) pushl $int_msg call printk popl %eax + popl %eax + popl %eax + popl %eax + popl %eax popl %ds popl %es popl %edx @@ -377,23 +386,27 @@ cpu_gdt_descr: .fill NR_CPUS-1,8,0 # space for the other GDT descriptors /* - * This is initialized to create an identity-mapping at 0-8M (for bootup - * purposes) and another mapping of the 0-8M area at virtual address + * This is initialized to create an identity-mapping at 0-16M (for bootup + * purposes) and another mapping of the 0-16M area at virtual address * PAGE_OFFSET. */ .org 0x1000 ENTRY(swapper_pg_dir) .long 0x00102007 .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 - /* default: 766 entries */ + .long 0x00104007 + .long 0x00105007 + .fill BOOT_USER_PGD_PTRS-4,4,0 + /* default: 764 entries */ .long 0x00102007 .long 0x00103007 - /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 + .long 0x00104007 + .long 0x00105007 + /* default: 252 entries */ + .fill BOOT_KERNEL_PGD_PTRS-4,4,0 /* - * The page tables are initialized to only 8MB here - the final page + * The page tables are initialized to only 16MB here - the final page * tables are set up later depending on memory size. */ .org 0x2000 @@ -402,15 +415,21 @@ ENTRY(pg0) .org 0x3000 ENTRY(pg1) +.org 0x4000 +ENTRY(pg2) + +.org 0x5000 +ENTRY(pg3) + /* * empty_zero_page must immediately follow the page tables ! (The * initialization loop counts until empty_zero_page) */ -.org 0x4000 +.org 0x6000 ENTRY(empty_zero_page) -.org 0x5000 +.org 0x7000 /* * Real beginning of normal "text" segment @@ -419,12 +438,12 @@ ENTRY(stext) ENTRY(_stext) /* - * This starts the data section. Note that the above is all - * in the text section because it has alignment requirements - * that we cannot fulfill any other way. + * This starts the data section. */ .data +.align PAGE_SIZE_asm + /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ @@ -439,7 +458,9 @@ ENTRY(boot_gdt_table) .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ #endif - .align L1_CACHE_BYTES + +.align PAGE_SIZE_asm + ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ --- linux-2.6.2-rc1/arch/i386/kernel/i386_ksyms.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/i386_ksyms.c 2004-01-22 00:01:18.000000000 -0800 @@ -32,7 +32,6 @@ #include #include #include -#include #include extern void dump_thread(struct pt_regs *, struct user *); @@ -98,7 +97,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter EXPORT_SYMBOL_NOVERS(__down_failed_trylock); EXPORT_SYMBOL_NOVERS(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); /* Delay loops */ EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__udelay); @@ -112,13 +110,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); +#if !defined(CONFIG_X86_UACCESS_INDIRECT) EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__direct_strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_from_user_ll); EXPORT_SYMBOL(__copy_to_user_ll); EXPORT_SYMBOL(strnlen_user); +#else /* CONFIG_X86_UACCESS_INDIRECT */ +EXPORT_SYMBOL(direct_csum_partial_copy_generic); +#endif EXPORT_SYMBOL(dma_alloc_coherent); EXPORT_SYMBOL(dma_free_coherent); @@ -203,11 +205,6 @@ EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); #endif -#ifdef CONFIG_EDD_MODULE -EXPORT_SYMBOL(edd); -EXPORT_SYMBOL(eddnr); -#endif - #if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) EXPORT_SYMBOL(ist_info); #endif --- linux-2.6.2-rc1/arch/i386/kernel/i387.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/i387.c 2004-01-22 00:01:18.000000000 -0800 @@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct * static int convert_fxsr_to_user( struct _fpstate __user *buf, struct i387_fxsave_struct *fxsave ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpreg __user *to; struct _fpxreg *from; @@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) return 1; - to = &buf->_st[0]; + to = tmp; from = (struct _fpxreg *) &fxsave->st_space[0]; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f+1); + to->exponent = from->exponent; } + if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) + return 1; return 0; } static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, struct _fpstate __user *buf ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpxreg *to; struct _fpreg __user *from; @@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) return 1; + if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) + return 1; fxsave->cwd = (unsigned short)(env[0] & 0xffff); fxsave->swd = (unsigned short)(env[1] & 0xffff); @@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc fxsave->fos = env[6]; to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; + from = tmp; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f + 1); + to->exponent = from->exponent; } return 0; } @@ -451,15 +455,18 @@ int get_fpxregs( struct user_fxsr_struct int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf ) { + int ret = 0; + if ( cpu_has_fxsr ) { - __copy_from_user( &tsk->thread.i387.fxsave, buf, - sizeof(struct user_fxsr_struct) ); + if (__copy_from_user( &tsk->thread.i387.fxsave, buf, + sizeof(struct user_fxsr_struct) )) + ret = -EFAULT; /* mxcsr bit 6 and 31-16 must be zero for security reasons */ tsk->thread.i387.fxsave.mxcsr &= 0xffbf; - return 0; } else { - return -EIO; + ret = -EIO; } + return ret; } /* --- linux-2.6.2-rc1/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/init_task.c 2004-01-22 00:01:18.000000000 -0800 @@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm); */ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; + { INIT_THREAD_INFO(init_task, init_thread_union) }; /* * Initial task structure. @@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; --- linux-2.6.2-rc1/arch/i386/kernel/io_apic.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/io_apic.c 2004-01-21 23:27:15.000000000 -0800 @@ -317,8 +317,7 @@ struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) \ - ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -401,6 +400,7 @@ static void do_irq_balance(void) unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; + int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; @@ -452,7 +452,7 @@ static void do_irq_balance(void) for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); @@ -471,7 +471,7 @@ tryanothercpu: for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; @@ -551,9 +551,14 @@ tryanotherirq: * We seek the least loaded sibling by making the comparison * (A+B)/2 vs B */ - if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > - CPU_IRQ(cpu_sibling_map[min_loaded])) - min_loaded = cpu_sibling_map[min_loaded]; + load = CPU_IRQ(min_loaded) >> 1; + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + if (load > CPU_IRQ(j)) { + /* This won't change cpu_sibling_map[min_loaded] */ + load = CPU_IRQ(j); + min_loaded = j; + } + } cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); @@ -1650,10 +1655,6 @@ static void __init setup_ioapic_ids_from unsigned char old_id; unsigned long flags; - if (acpi_ioapic) - /* This gets done during IOAPIC enumeration for ACPI. */ - return; - /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. @@ -2286,12 +2287,14 @@ void __init setup_IO_APIC(void) /* * Set up IO-APIC IRQ routing. */ - setup_ioapic_ids_from_mpc(); + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - print_IO_APIC(); + if (!acpi_ioapic) + print_IO_APIC(); } /* --- linux-2.6.2-rc1/arch/i386/kernel/irq.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/irq.c 2004-01-21 23:26:59.000000000 -0800 @@ -508,6 +508,8 @@ out: irq_exit(); + kgdb_process_breakpoint(); + return 1; } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/kgdb_stub.c 2004-01-21 23:27:00.000000000 -0800 @@ -0,0 +1,2457 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned int esp; + int array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned int OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movl %eax,%esp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addl $-4,%ebx\n\t" + "movl (%ebx), %eax\n\t" + "pushl %eax\n\t" + "cmpl %esp,%ecx\n\t" + "jne 1b\n\t" + "popl %eax\n\t" + "popl %ebx\n\t" + "popl %ecx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("EAX=%08lx ", regs->eax); + printk("EBX=%08lx ", regs->ebx); + printk("ECX=%08lx ", regs->ecx); + printk("EDX=%08lx ", regs->edx); + printk("\n"); + printk("ESI=%08lx ", regs->esi); + printk("EDI=%08lx ", regs->edi); + printk("EBP=%08lx ", regs->ebp); + printk("ESP=%08lx ", (long) ®s->esp); + printk("\n"); + printk(" DS=%08x ", regs->xds); + printk(" ES=%08x ", regs->xes); + printk(" SS=%08x ", __KERNEL_DS); + printk(" FL=%08lx ", regs->eflags); + printk("\n"); + printk(" CS=%08x ", regs->xcs); + printk(" IP=%08lx ", regs->eip); +#if 0 + printk(" FS=%08x ", regs->fs); + printk(" GS=%08x ", regs->gs); +#endif + printk("\n"); + +} /* print_regs */ + +#define NEW_esp fn_call_lookaside[trap_cpu].esp + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ + gdb_regs[_ESP] = NEW_esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; + NEW_esp = gdb_regs[_ESP]; /* keep the value */ +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) +{ + unsigned long stack_page; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_ESP] = + (int) &kgdb_info.cpus_waiting[i].regs->esp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; + gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); + gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + stack_page = (unsigned long) p->thread_info; + if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (gdb_regs[_EBP] < stack_page || + gdb_regs[_EBP] > 8184 + stack_page) + return; + gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); + gdb_regs[_ESP] = gdb_regs[_EBP] + 8; + gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; + if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) + return; + } while (count++ < 16); + return; +} + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int mem_err = 0; +static volatile int mem_err_expected = 0; +static volatile int mem_err_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val, int may_fault) +{ + /* + * This code traps references to the area mapped to the kernel + * stack as given by the regs and, instead, stores to the + * fn_call_lookaside[cpu].array + */ + if (may_fault && + (unsigned int) addr < OLD_esp && + ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned flags; + int cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + kgdb_local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + int addr, length; + int breakno, breaktype; + char *ptr; + int newPC; + threadref thref; + int threadid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + int gdb_regs[NUMREGBYTES / 4]; + int dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) +#define NUMREGS NUMREGBYTES/4 + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + kgdb_local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time = 0, end_time, dum = 0; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movl %0,%%db7" + : /* no output */ + :"r"(0)); + + asm volatile ("movl %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (int) (&linux_regs->esp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES, 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). These may have + * meaning for ptrace, but for us it is safe to + * ignor them. We do this by dumping them into + * _GS which we also ignor, but do have memory for. + */ + int regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToInt(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + regno = + (regno >= NUMREGS ? _GS : regno); + hex2mem(ptr, (char *) &gdb_regs[regno], + 4, 0); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + mem2hex((char *) addr, + remcomOutBuffer, length, 1); + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { + hex2mem(ptr, (char *) addr, length, 1); + + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%x\n", addr); + + regs.eip = addr; + } + + newPC = regs.eip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + int *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (int); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->xcs; + *--ptr = linux_regs->eip; + *--ptr = linux_regs->ecx; + *--ptr = linux_regs->ebx; + *--ptr = linux_regs->eax; + linux_regs->ecx = NEW_esp - (sizeof (int) * 6); + linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->eip = (unsigned int) fn_call_stub; + } else { + linux_regs->eip = (unsigned int) fn_rtn_stub; + linux_regs->eax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + kgdb_local_irq_restore(flags); + return (0); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + kgdb_local_irq_restore(flags); + return (0); +} + +/* this function is used to set up exception handlers for tracing and + * breakpoints. + * This function is not needed as the above line does all that is needed. + * We leave it for backward compatitability... + */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + + * But really folks, every hear of labeled common, an old Fortran + * concept. Lots of folks can reference it and it is define if + * anyone does. Only one can initialize it at link time. We do + * this with the hook. See the statement above. No need for any + * executable code and it is ready as soon as the kernel is + * loaded. Very desirable in kernel debugging. + + linux_debug_hook = handle_exception ; + */ + + /* In case GDB is started before us, ack any packets (presumably + "$?#xx") sitting there. + putDebugChar ('+'); + + initialized = 1; + */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + kgdb_local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + kgdb_local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +} + --- linux-2.6.2-rc1/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/ldt.c 2004-01-22 00:01:18.000000000 -0800 @@ -2,7 +2,7 @@ * linux/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 1999, 2003 Ingo Molnar */ #include @@ -18,6 +18,8 @@ #include #include #include +#include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) @@ -29,34 +31,31 @@ static void flush_ldt(void *null) static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; - int oldsize; + int oldsize, newsize, i; if (mincount <= pc->size) return 0; + /* + * LDT got larger - reallocate if necessary. + */ oldsize = pc->size; mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); + newsize = mincount*LDT_ENTRY_SIZE; + for (i = 0; i < newsize; i += PAGE_SIZE) { + int nr = i/PAGE_SIZE; + BUG_ON(i >= 64*1024); + if (!pc->ldt_pages[nr]) { + pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); + if (!pc->ldt_pages[nr]) + return -ENOMEM; + clear_highpage(pc->ldt_pages[nr]); + } + } pc->size = mincount; - wmb(); - if (reload) { #ifdef CONFIG_SMP cpumask_t mask; + preempt_disable(); load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); @@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i load_LDT(pc); #endif } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } return 0; } static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { - int err = alloc_ldt(new, old->size, 0); - if (err < 0) + int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; + + err = alloc_ldt(new, size, 0); + if (err < 0) { + new->size = 0; return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + } + for (i = 0; i < nr_pages; i++) + copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); return 0; } @@ -96,6 +94,7 @@ int init_new_context(struct task_struct init_MUTEX(&mm->context.sem); mm->context.size = 0; + memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); @@ -107,23 +106,21 @@ int init_new_context(struct task_struct /* * No need to lock the MM as we are the last user + * Do not touch the ldt register, we are already + * in the next thread. */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } + int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) + __free_page(mm->context.ldt_pages[i]); + mm->context.size = 0; } static int read_ldt(void __user * ptr, unsigned long bytecount) { - int err; + int err, i; unsigned long size; struct mm_struct * mm = current->mm; @@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u size = bytecount; err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; + /* + * This is necessary just in case we got here straight from a + * context-switch where the ptes were set but no tlb flush + * was done yet. We rather avoid doing a TLB flush in the + * context-switch path and do it here instead. + */ + __flush_tlb_global(); + + for (i = 0; i < size; i += PAGE_SIZE) { + int nr = i / PAGE_SIZE, bytes; + char *kaddr = kmap(mm->context.ldt_pages[nr]); + + bytes = size - i; + if (bytes > PAGE_SIZE) + bytes = PAGE_SIZE; + if (copy_to_user(ptr + i, kaddr, size - i)) + err = -EFAULT; + kunmap(mm->context.ldt_pages[nr]); + } up(&mm->context.sem); if (err < 0) return err; @@ -158,7 +172,7 @@ static int read_default_ldt(void __user err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = 5*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); + /* + * No rescheduling allowed from this point to the install. + * + * We do a TLB flush for the same reason as in the read_ldt() path. + */ + preempt_disable(); + __flush_tlb_global(); + lp = (__u32 *) ((ldt_info.entry_number << 3) + + (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { @@ -221,6 +243,7 @@ install: *lp = entry_1; *(lp+1) = entry_2; error = 0; + preempt_enable(); out_unlock: up(&mm->context.sem); @@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, } return ret; } + +/* + * load one particular LDT into the current CPU + */ +void load_LDT_nolock(mm_context_t *pc, int cpu) +{ + struct page **pages = pc->ldt_pages; + int count = pc->size; + int nr_pages, i; + + if (likely(!count)) { + pages = &default_ldt_page; + count = 5; + } + nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) { + __kunmap_atomic_type(KM_LDT_PAGE0 - i); + __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); + } + set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); + load_LDT_desc(); +} --- linux-2.6.2-rc1/arch/i386/kernel/Makefile 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/Makefile 2004-01-22 00:01:18.000000000 -0800 @@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o + doublefault.o entry_trampoline.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o --- linux-2.6.2-rc1/arch/i386/kernel/microcode.c 2003-10-25 14:45:44.000000000 -0700 +++ 25/arch/i386/kernel/microcode.c 2004-01-21 23:30:42.000000000 -0800 @@ -507,3 +507,4 @@ static void __exit microcode_exit (void) module_init(microcode_init) module_exit(microcode_exit) +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); --- linux-2.6.2-rc1/arch/i386/kernel/mpparse.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/mpparse.c 2004-01-22 00:01:18.000000000 -0800 @@ -668,7 +668,7 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); @@ -1081,8 +1081,14 @@ found: ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; + /* + * MPS INTI flags: + * trigger: 0=default, 1=edge, 3=level + * polarity: 0=default, 1=high, 3=low + * Per ACPI spec, default for SCI means level/low. + */ io_apic_set_pci_routing(ioapic, ioapic_pin, irq, - (flags.trigger >> 1) , (flags.polarity >> 1)); + (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1)); } #ifdef CONFIG_ACPI_PCI @@ -1120,7 +1126,8 @@ void __init mp_parse_prt (void) /* Don't set up the ACPI SCI because it's already set up */ if (acpi_fadt.sci_int == irq) { - entry->irq = irq; /*we still need to set entry's irq*/ + irq = acpi_irq_to_vector(irq); + entry->irq = irq; /* we still need to set entry's irq */ continue; } @@ -1129,8 +1136,11 @@ void __init mp_parse_prt (void) continue; ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; - if (!ioapic && (irq < 16)) - irq += 16; + if (es7000_plat) { + if (!ioapic && (irq < 16)) + irq += 16; + } + /* * Avoid pin reprogramming. PRTs typically include entries * with redundant pin->irq mappings (but unique PCI devices); @@ -1147,18 +1157,14 @@ void __init mp_parse_prt (void) if ((1<irq = irq; + entry->irq = acpi_irq_to_vector(irq); continue; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; + entry->irq = acpi_irq_to_vector(irq); } printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", entry->id.segment, entry->id.bus, @@ -1166,6 +1172,10 @@ void __init mp_parse_prt (void) mp_ioapic_routing[ioapic].apic_id, ioapic_pin, entry->irq); } + + print_IO_APIC(); + + return; } #endif /*CONFIG_ACPI_PCI*/ --- linux-2.6.2-rc1/arch/i386/kernel/msr.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/msr.c 2004-01-21 23:30:34.000000000 -0800 @@ -1,4 +1,3 @@ -#ident "$Id$" /* ----------------------------------------------------------------------- * * * Copyright 2000 H. Peter Anvin - All Rights Reserved --- linux-2.6.2-rc1/arch/i386/kernel/nmi.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/nmi.c 2004-01-21 23:26:58.000000000 -0800 @@ -31,7 +31,16 @@ #include #include +#ifdef CONFIG_KGDB +#include +#ifdef CONFIG_SMP +unsigned int nmi_watchdog = NMI_IO_APIC; +#else +unsigned int nmi_watchdog = NMI_LOCAL_APIC; +#endif +#else unsigned int nmi_watchdog = NMI_NONE; +#endif static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ extern void show_registers(struct pt_regs *regs); @@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; } +#ifdef CONFIG_KGDB +int tune_watchdog = 5*HZ; +#endif void nmi_watchdog_tick (struct pt_regs * regs) { @@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; +#ifdef CONFIG_KGDB + if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { + +#else if (last_irq_sums[cpu] == sum) { +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; +#ifdef CONFIG_KGDB + if (alert_counter[cpu] == tune_watchdog) { + kgdb_handle_exception(2, SIGPWR, 0, regs); + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +#endif if (alert_counter[cpu] == 5*nmi_hz) { spin_lock(&nmi_print_lock); /* --- linux-2.6.2-rc1/arch/i386/kernel/process.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/process.c 2004-01-22 00:01:18.000000000 -0800 @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_MATH_EMULATION #include #endif @@ -302,6 +303,9 @@ void flush_thread(void) struct task_struct *tsk = current; memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); +#ifdef CONFIG_X86_HIGH_ENTRY + clear_thread_flag(TIF_DB7); +#endif memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -315,9 +319,8 @@ void release_thread(struct task_struct * if (dead_task->mm) { // temporary debugging check if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", + printk("WARNING: dead process %8s still has LDT? <%d>\n", dead_task->comm, - dead_task->mm->context.ldt, dead_task->mm->context.size); BUG(); } @@ -352,7 +355,17 @@ int copy_thread(int nr, unsigned long cl p->thread.esp = (unsigned long) childregs; p->thread.esp0 = (unsigned long) (childregs+1); + /* + * get the two stack pages, for the virtual stack. + * + * IMPORTANT: this code relies on the fact that the task + * structure is an 8K aligned piece of physical memory. + */ + p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); + p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); + p->thread.eip = (unsigned long) ret_from_fork; + p->thread_info->real_stack = p->thread_info; savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); @@ -504,10 +517,41 @@ struct task_struct * __switch_to(struct __unlazy_fpu(prev_p); +#ifdef CONFIG_X86_HIGH_ENTRY + /* + * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is + * needed because otherwise NMIs could interrupt the + * user-return code with a virtual stack and stale TLBs.) + */ + __kunmap_atomic_type(KM_VSTACK0); + __kunmap_atomic_type(KM_VSTACK1); + __kmap_atomic(next->stack_page0, KM_VSTACK0); + __kmap_atomic(next->stack_page1, KM_VSTACK1); + + /* + * NOTE: here we rely on the task being the stack as well + */ + next_p->thread_info->virtual_stack = + (void *)__kmap_atomic_vaddr(KM_VSTACK0); + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) + /* + * If next was preempted on entry from userspace to kernel, + * and now it's on a different cpu, we need to adjust %esp. + * This assumes that entry.S does not copy %esp while on the + * virtual stack (with interrupts enabled): which is so, + * except within __SWITCH_KERNELSPACE itself. + */ + if (unlikely(next->esp >= TASK_SIZE)) { + next->esp &= THREAD_SIZE - 1; + next->esp |= (unsigned long) next_p->thread_info->virtual_stack; + } +#endif +#endif /* * Reload esp0, LDT and the page table pointer: */ - load_esp0(tss, next); + load_virtual_esp0(tss, next_p); /* * Load the per-thread Thread-Local Storage descriptor. --- linux-2.6.2-rc1/arch/i386/kernel/reboot.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/reboot.c 2004-01-22 00:01:18.000000000 -0800 @@ -155,12 +155,11 @@ void machine_real_restart(unsigned char CMOS_WRITE(0x00, 0x8f); spin_unlock_irqrestore(&rtc_lock, flags); - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); + /* + * Remap the first 16 MB of RAM (which includes the kernel image) + * at virtual address zero: + */ + setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); /* * Use `swapper_pg_dir' as our page directory. --- linux-2.6.2-rc1/arch/i386/kernel/setup.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/setup.c 2004-01-21 23:30:45.000000000 -0800 @@ -78,13 +78,10 @@ EXPORT_SYMBOL_GPL(mmu_cr4_features); EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_ACPI_BOOT - int acpi_irq __initdata = 1; /* enable IRQ */ - int acpi_ht __initdata = 1; /* enable HT */ +extern int __initdata acpi_ht; +int __initdata acpi_force = 0; #endif -int acpi_force __initdata = 0; - - int MCA_bus; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; @@ -447,6 +444,12 @@ static int __init copy_e820_map(struct e #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) unsigned char eddnr; struct edd_info edd[EDDMAXNR]; +unsigned int edd_disk80_sig; +#ifdef CONFIG_EDD_MODULE +EXPORT_SYMBOL(eddnr); +EXPORT_SYMBOL(edd); +EXPORT_SYMBOL(edd_disk80_sig); +#endif /** * copy_edd() - Copy the BIOS EDD information * from empty_zero_page into a safe place. @@ -456,6 +459,7 @@ static inline void copy_edd(void) { eddnr = EDD_NR; memcpy(edd, EDD_BUF, sizeof(edd)); + edd_disk80_sig = DISK80_SIGNATURE; } #else #define copy_edd() do {} while (0) @@ -573,7 +577,7 @@ static void __init parse_cmdline_early ( /* "pci=noacpi" disables ACPI interrupt routing */ else if (!memcmp(from, "pci=noacpi", 10)) { - acpi_irq = 0; + acpi_noirq_set(); } #ifdef CONFIG_X86_LOCAL_APIC --- linux-2.6.2-rc1/arch/i386/kernel/signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/kernel/signal.c 2004-01-22 00:01:18.000000000 -0800 @@ -128,28 +128,29 @@ sys_sigaltstack(const stack_t __user *us */ static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) +restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *__sc, int *peax) { - unsigned int err = 0; + struct sigcontext scratch; /* 88 bytes of scratch area */ /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) + if (copy_from_user(&scratch, __sc, sizeof(scratch))) + return -EFAULT; + +#define COPY(x) regs->x = scratch.x #define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ regs->x##seg = tmp; } #define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ regs->x##seg = tmp|3; } #define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + { unsigned short tmp = scratch.seg; \ loadsegment(seg,tmp); } GET_SEG(gs); @@ -168,27 +169,23 @@ restore_sigcontext(struct pt_regs *regs, COPY_SEG_STRICT(ss); { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); + unsigned int tmpflags = scratch.eflags; regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); regs->orig_eax = -1; /* disable syscall checks */ } { - struct _fpstate __user * buf; - err |= __get_user(buf, &sc->fpstate); + struct _fpstate * buf = scratch.fpstate; if (buf) { if (verify_area(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); + return -EFAULT; + if (restore_i387(buf)) + return -EFAULT; } } - err |= __get_user(*peax, &sc->eax); - return err; - -badframe: - return 1; + *peax = scratch.eax; + return 0; } asmlinkage int sys_sigreturn(unsigned long __unused) @@ -266,46 +263,47 @@ badframe: */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate, struct pt_regs *regs, unsigned long mask) { - int tmp, err = 0; + struct sigcontext sc; /* 88 bytes of scratch area */ + int tmp; tmp = 0; __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->gs); + *(unsigned int *)&sc.gs = tmp; __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->fs); - - err |= __put_user(regs->xes, (unsigned int *)&sc->es); - err |= __put_user(regs->xds, (unsigned int *)&sc->ds); - err |= __put_user(regs->edi, &sc->edi); - err |= __put_user(regs->esi, &sc->esi); - err |= __put_user(regs->ebp, &sc->ebp); - err |= __put_user(regs->esp, &sc->esp); - err |= __put_user(regs->ebx, &sc->ebx); - err |= __put_user(regs->edx, &sc->edx); - err |= __put_user(regs->ecx, &sc->ecx); - err |= __put_user(regs->eax, &sc->eax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->eip, &sc->eip); - err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); - err |= __put_user(regs->eflags, &sc->eflags); - err |= __put_user(regs->esp, &sc->esp_at_signal); - err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + *(unsigned int *)&sc.fs = tmp; + *(unsigned int *)&sc.es = regs->xes; + *(unsigned int *)&sc.ds = regs->xds; + sc.edi = regs->edi; + sc.esi = regs->esi; + sc.ebp = regs->ebp; + sc.esp = regs->esp; + sc.ebx = regs->ebx; + sc.edx = regs->edx; + sc.ecx = regs->ecx; + sc.eax = regs->eax; + sc.trapno = current->thread.trap_no; + sc.err = current->thread.error_code; + sc.eip = regs->eip; + *(unsigned int *)&sc.cs = regs->xcs; + sc.eflags = regs->eflags; + sc.esp_at_signal = regs->esp; + *(unsigned int *)&sc.ss = regs->xss; tmp = save_i387(fpstate); if (tmp < 0) - err = 1; - else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + return 1; + sc.fpstate = tmp ? fpstate : NULL; /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + sc.oldmask = mask; + sc.cr2 = current->thread.cr2; - return err; + if (copy_to_user(__sc, &sc, sizeof(sc))) + return 1; + return 0; } /* @@ -443,7 +441,7 @@ static void setup_rt_frame(int sig, stru /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->esp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); --- linux-2.6.2-rc1/arch/i386/kernel/smpboot.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/i386/kernel/smpboot.c 2004-01-21 23:27:17.000000000 -0800 @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -932,7 +933,7 @@ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -1078,32 +1079,34 @@ static void __init smp_boot_cpus(unsigne Dprintk("Boot done.\n"); /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpu_sibling_map[cpu] = CPU_MASK_NONE; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); } smpboot_setup_io_apic(); @@ -1117,6 +1120,220 @@ static void __init smp_boot_cpus(unsigne synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + cpumask_t all_cpus = CPU_MASK_NONE; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + + cpu_set(i, all_cpus); + } + + /* Set up domains */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->flags |= SD_FLAG_IDLE; + + *node_domain = SD_NODE_INIT; + node_domain->span = all_cpus; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), all_cpus); + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = cpu_sched_domain(j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), all_cpus); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + + mb(); + for_each_cpu_mask(i, all_cpus) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + struct sched_group *node_group = &sched_group_nodes[node]; + + cpu_domain->parent = phys_domain; + phys_domain->parent = node_domain; + + node_domain->groups = node_group; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#else /* CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + cpumask_t all_cpus = CPU_MASK_NONE; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + + cpu_set(i, all_cpus); + } + + /* Set up domains */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = all_cpus; + phys_domain->flags |= SD_FLAG_IDLE; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + cpu_domain->parent = phys_domain; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) --- linux-2.6.2-rc1/arch/i386/kernel/smp.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/smp.c 2004-01-22 00:01:18.000000000 -0800 @@ -327,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt if (flush_mm == cpu_tlbstate[cpu].active_mm) { if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { +#ifndef CONFIG_X86_SWITCH_PAGETABLES if (flush_va == FLUSH_ALL) local_flush_tlb(); else __flush_tlb_one(flush_va); +#endif } else leave_mm(cpu); } @@ -396,21 +398,6 @@ static void flush_tlb_others(cpumask_t c spin_unlock(&tlbstate_lock); } -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - preempt_enable(); -} - void flush_tlb_mm (struct mm_struct * mm) { cpumask_t cpu_mask; @@ -442,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc if (current->active_mm == mm) { if(current->mm) - __flush_tlb_one(va); +#ifndef CONFIG_X86_SWITCH_PAGETABLES + __flush_tlb_one(va) +#endif + ; else leave_mm(smp_processor_id()); } @@ -466,7 +456,17 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, 0, 1, 1); } - +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing --- linux-2.6.2-rc1/arch/i386/kernel/sysenter.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/sysenter.c 2004-01-22 00:01:18.000000000 -0800 @@ -18,13 +18,18 @@ #include #include #include +#include extern asmlinkage void sysenter_entry(void); void enable_sep_cpu(void *info) { int cpu = get_cpu(); +#ifdef CONFIG_X86_HIGH_ENTRY + struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; +#else struct tss_struct *tss = init_tss + cpu; +#endif tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; --- linux-2.6.2-rc1/arch/i386/kernel/timers/common.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/timers/common.c 2004-01-21 23:27:12.000000000 -0800 @@ -137,3 +137,23 @@ bad_calibration: } #endif +/* calculate cpu_khz */ +void __init init_cpu_khz(void) +{ + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if (tsc_quotient) { + /* report CPU clock rate in Hz. + * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + { unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (cpu_khz), "=d" (edx) + :"r" (tsc_quotient), + "0" (eax), "1" (edx)); + printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + } + } + } +} --- linux-2.6.2-rc1/arch/i386/kernel/timers/Makefile 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/i386/kernel/timers/Makefile 2004-01-21 23:27:12.000000000 -0800 @@ -6,3 +6,4 @@ obj-y := timer.o timer_none.o timer_tsc. obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o obj-$(CONFIG_HPET_TIMER) += timer_hpet.o +obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o --- linux-2.6.2-rc1/arch/i386/kernel/timers/timer.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/kernel/timers/timer.c 2004-01-21 23:27:12.000000000 -0800 @@ -19,6 +19,9 @@ static struct timer_opts* timers[] = { #ifdef CONFIG_HPET_TIMER &timer_hpet, #endif +#ifdef CONFIG_X86_PM_TIMER + &timer_pmtmr, +#endif &timer_tsc, &timer_pit, NULL, --- linux-2.6.2-rc1/arch/i386/kernel/timers/timer_cyclone.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/timers/timer_cyclone.c 2004-01-21 23:27:12.000000000 -0800 @@ -212,26 +212,7 @@ static int __init init_cyclone(char* ove } } - /* init cpu_khz. - * XXX - This should really be done elsewhere, - * and in a more generic fashion. -johnstul@us.ibm.com - */ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - } - } - } + init_cpu_khz(); /* Everything looks good! */ return 0; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/kernel/timers/timer_pm.c 2004-01-21 23:27:12.000000000 -0800 @@ -0,0 +1,217 @@ +/* + * (C) Dominik Brodowski 2003 + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/acpi/boot.c */ +u32 pmtmr_ioport = 0; + + +/* value of the Power timer at last timer interrupt */ +static u32 offset_tick; +static u32 offset_delay; + +static unsigned long long monotonic_base; +static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +/*helper function to safely read acpi pm timesource*/ +static inline u32 read_pmtmr(void) +{ + u32 v1=0,v2=0,v3=0; + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; +} + +static int init_pmtmr(char* override) +{ + u32 value1, value2; + unsigned int i; + + if (override[0] && strncmp(override,"pmtmr",5)) + return -ENODEV; + + if (!pmtmr_ioport) + return -ENODEV; + + /* "verify" this timing source */ + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); + if (value2 == value1) + continue; + if (value2 > value1) + goto pm_good; + if ((value2 < value1) && ((value2) < 0xFFF)) + goto pm_good; + printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); + return -EINVAL; + } + printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); + return -ENODEV; + +pm_good: + init_cpu_khz(); + return 0; +} + +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} + +/* + * this gets called during each timer interrupt + * - Called while holding the writer xtime_lock + */ +static void mark_offset_pmtmr(void) +{ + u32 lost, delta, last_offset; + static int first_run = 1; + last_offset = offset_tick; + + write_seqlock(&monotonic_lock); + + offset_tick = read_pmtmr(); + + /* calculate tick interval */ + delta = (offset_tick - last_offset) & ACPI_PM_MASK; + + /* convert to usecs */ + delta = cyc2us(delta); + + /* update the monotonic base value */ + monotonic_base += delta * NSEC_PER_USEC; + write_sequnlock(&monotonic_lock); + + /* convert to ticks */ + delta += offset_delay; + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + + /* compensate for lost ticks */ + if (lost >= 2) + jiffies_64 += lost - 1; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } +} + + +static unsigned long long monotonic_clock_pmtmr(void) +{ + u32 last_offset, this_offset; + unsigned long long base, ret; + unsigned seq; + + + /* atomically read monotonic base & last_offset */ + do { + seq = read_seqbegin(&monotonic_lock); + last_offset = offset_tick; + base = monotonic_base; + } while (read_seqretry(&monotonic_lock, seq)); + + /* Read the pmtmr */ + this_offset = read_pmtmr(); + + /* convert to nanoseconds */ + ret = (this_offset - last_offset) & ACPI_PM_MASK; + ret = base + (cyc2us(ret) * NSEC_PER_USEC); + return ret; +} + +/* + * copied from delay_pit + */ +static void delay_pmtmr(unsigned long loops) +{ + int d0; + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + + +/* + * get the offset (in microseconds) from the last call to mark_offset() + * - Called holding a reader xtime_lock + */ +static unsigned long get_offset_pmtmr(void) +{ + u32 now, offset, delta = 0; + + offset = offset_tick; + now = read_pmtmr(); + delta = (now - offset)&ACPI_PM_MASK; + + return (unsigned long) offset_delay + cyc2us(delta); +} + + +/* acpi timer_opts struct */ +struct timer_opts timer_pmtmr = { + .name = "pmtmr", + .init = init_pmtmr, + .mark_offset = mark_offset_pmtmr, + .get_offset = get_offset_pmtmr, + .monotonic_clock = monotonic_clock_pmtmr, + .delay = delay_pmtmr, +}; + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); --- linux-2.6.2-rc1/arch/i386/kernel/traps.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/traps.c 2004-01-22 00:01:18.000000000 -0800 @@ -54,12 +54,8 @@ #include "mach_traps.h" -asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; +struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; +struct page *default_ldt_page; /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; @@ -91,6 +87,43 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); +#ifdef CONFIG_KGDB +extern void sysenter_entry(void); +#include +#include +extern void int3(void); +extern void debug(void); +void set_intr_gate(unsigned int n, void *addr); +static void set_intr_usr_gate(unsigned int n, void *addr); +/* + * Should be able to call this breakpoint() very early in + * bring up. Just hard code the call where needed. + * The breakpoint() code is here because set_?_gate() functions + * are local (static) to trap.c. They need be done only once, + * but it does not hurt to do them over. + */ +void breakpoint(void) +{ + init_entry_mappings(); + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); + + BREAKPOINT; +} +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ + { \ + if (!user_mode(regs) ) \ + { \ + kgdb_handle_exception(trapnr, signr, error_code, regs); \ + after; \ + } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ + } +#else +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) +#endif + + static int kstack_depth_to_print = 24; void show_trace(struct task_struct *task, unsigned long * stack) @@ -175,8 +208,9 @@ void show_registers(struct pt_regs *regs ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, + regs->eip, print_tainted(), regs->eflags); print_symbol("EIP is at %s\n", regs->eip); printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", @@ -192,23 +226,27 @@ void show_registers(struct pt_regs *regs * time of the fault.. */ if (in_kernel) { + u8 *eip; printk("\nStack: "); show_stack(NULL, (unsigned long*)esp); printk("Code: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - for(i=0;i<20;i++) - { - unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: + eip = (u8 *)regs->eip - 43; + for (i = 0; i < 64; i++, eip++) { + unsigned char c = 0xff; + + if ((user_mode(regs) && get_user(c, eip)) || + (!user_mode(regs) && __direct_get_user(c, eip))) { + printk(" Bad EIP value."); break; } - printk("%02x ", c); + if (eip == (u8 *)regs->eip) + printk("<%02x> ", c); + else + printk("%02x ", c); } } printk("\n"); @@ -255,12 +293,36 @@ spinlock_t die_lock = SPIN_LOCK_UNLOCKED void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; + int nl = 0; console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); handle_BUG(regs); printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP "); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); + nl = 1; +#endif + if (nl) + printk("\n"); +#ifdef CONFIG_KGDB + /* This is about the only place we want to go to kgdb even if in + * user mode. But we must go in via a trap so within kgdb we will + * always be in kernel mode. + */ + if (user_mode(regs)) + BREAKPOINT; +#endif + CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); @@ -330,6 +392,7 @@ static inline void do_trap(int trapnr, i #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -347,7 +410,9 @@ asmlinkage void do_##name(struct pt_regs #define DO_VM86_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ + return; \ } #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -394,8 +459,10 @@ gp_in_vm86: return; gp_in_kernel: - if (!fixup_exception(regs)) + if (!fixup_exception(regs)){ + CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) die("general protection fault", regs, error_code); + } } static void mem_parity_error(unsigned char reason, struct pt_regs * regs) @@ -534,10 +601,18 @@ asmlinkage void do_debug(struct pt_regs if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); - /* Mask out spurious debug traps due to lazy DR7 setting */ + /* + * Mask out spurious debug traps due to lazy DR7 setting or + * due to 4G/4G kernel mode: + */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg[7]) goto clear_dr7; + if (!user_mode(regs)) { + // restore upon return-to-userspace: + set_thread_flag(TIF_DB7); + goto clear_dr7; + } } if (regs->eflags & VM_MASK) @@ -557,8 +632,18 @@ asmlinkage void do_debug(struct pt_regs * allowing programs to debug themselves without the ptrace() * interface. */ +#ifdef CONFIG_KGDB + /* + * I think this is the only "real" case of a TF in the kernel + * that really belongs to user space. Others are + * "Ours all ours!" + */ + if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry)) + goto clear_TF_reenable; +#else if ((regs->xcs & 3) == 0) goto clear_TF_reenable; +#endif if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) goto clear_TF; } @@ -570,6 +655,17 @@ asmlinkage void do_debug(struct pt_regs info.si_errno = 0; info.si_code = TRAP_BRKPT; +#ifdef CONFIG_KGDB + /* + * If this is a kernel mode trap, we need to reset db7 to allow us + * to continue sanely ALSO skip the signal delivery + */ + if ((regs->xcs & 3) == 0) + goto clear_dr7; + + /* if not kernel, allow ints but only if they were on */ + if ( regs->eflags & 0x200) local_irq_enable(); +#endif /* If this is a kernel mode trap, save the user PC on entry to * the kernel, that's what the debugger can make sense of. */ @@ -584,6 +680,7 @@ clear_dr7: __asm__("movl %0,%%db7" : /* no output */ : "r" (0)); + CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) return; debug_vm86: @@ -779,19 +876,53 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) +void __init trap_init_virtual_IDT(void) { - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. + * "idt" is magic - it overlaps the idt_descr + * variable so that updating idt will automatically + * update the idt descriptor.. */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); + __set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + idt_descr.address = __fix_to_virt(FIX_IDT); + __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); } + +void __init trap_init_virtual_GDT(void) +{ + int cpu = smp_processor_id(); + struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu; + struct Xgt_desc_struct tmp_desc = {0, 0}; + struct tss_struct * t; + + __asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory"); + +#ifdef CONFIG_X86_HIGH_ENTRY + if (!cpu) { + __set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL); + __set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_0, __pa(init_tss), PAGE_KERNEL); + __set_fixmap(FIX_TSS_1, __pa(init_tss) + 1*PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_2, __pa(init_tss) + 2*PAGE_SIZE, PAGE_KERNEL); + __set_fixmap(FIX_TSS_3, __pa(init_tss) + 3*PAGE_SIZE, PAGE_KERNEL); + } + + gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu; +#else + gdt_desc->address = (unsigned long)cpu_gdt_table[cpu]; +#endif + __asm__ __volatile__("lgdt %0": "=m" (*gdt_desc)); + +#ifdef CONFIG_X86_HIGH_ENTRY + t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; +#else + t = init_tss + cpu; #endif + set_tss_desc(cpu, t); + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; + load_TR_desc(); +} #define _set_gate(gate_addr,type,dpl,addr,seg) \ do { \ @@ -818,20 +949,26 @@ void set_intr_gate(unsigned int n, void _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); } -static void __init set_trap_gate(unsigned int n, void *addr) +void __init set_trap_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); } -static void __init set_system_gate(unsigned int n, void *addr) +void __init set_system_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); } -static void __init set_call_gate(void *a, void *addr) +void __init set_call_gate(void *a, void *addr) { _set_gate(a,12,3,addr,__KERNEL_CS); } +#ifdef CONFIG_KGDB +void set_intr_usr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); +} +#endif static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { @@ -850,11 +987,16 @@ void __init trap_init(void) #ifdef CONFIG_X86_LOCAL_APIC init_apic_mappings(); #endif + init_entry_mappings(); set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); +#ifndef CONFIG_KGDB set_system_gate(3,&int3); /* int3-5 can be called from all */ +#else + set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ +#endif set_system_gate(4,&overflow); set_system_gate(5,&bounds); set_trap_gate(6,&invalid_op); --- linux-2.6.2-rc1/arch/i386/kernel/vm86.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/vm86.c 2004-01-22 00:01:18.000000000 -0800 @@ -125,7 +125,7 @@ struct pt_regs * save_v86_state(struct k tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; - load_esp0(tss, ¤t->thread); + load_virtual_esp0(tss, current); current->thread.saved_esp0 = 0; put_cpu(); @@ -305,7 +305,7 @@ static void do_sys_vm86(struct kernel_vm tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; - load_esp0(tss, &tsk->thread); + load_virtual_esp0(tss, tsk); put_cpu(); tsk->thread.screen_bitmap = info->screen_bitmap; --- linux-2.6.2-rc1/arch/i386/kernel/vmlinux.lds.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vmlinux.lds.S 2004-01-22 00:01:18.000000000 -0800 @@ -3,6 +3,9 @@ */ #include +#include +#include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -10,7 +13,7 @@ ENTRY(startup_32) jiffies = jiffies_64; SECTIONS { - . = 0xC0000000 + 0x100000; + . = __PAGE_OFFSET + 0x100000; /* read-only */ _text = .; /* Text and read-only data */ .text : { @@ -19,6 +22,19 @@ SECTIONS *(.gnu.warning) } = 0x9090 +#ifdef CONFIG_X86_4G + . = ALIGN(PAGE_SIZE_asm); + __entry_tramp_start = .; + . = FIX_ENTRY_TRAMPOLINE_0_addr; + __start___entry_text = .; + .entry.text : AT (__entry_tramp_start) { *(.entry.text) } + __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text); + . = __entry_tramp_end; + . = ALIGN(PAGE_SIZE_asm); +#else + .entry.text : { *(.entry.text) } +#endif + _etext = .; /* End of text section */ . = ALIGN(16); /* Exception table */ @@ -34,15 +50,12 @@ SECTIONS CONSTRUCTORS } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __nosave_begin = .; .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __nosave_end = .; - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - . = ALIGN(32); .data.cacheline_aligned : { *(.data.cacheline_aligned) } @@ -52,7 +65,7 @@ SECTIONS .data.init_task : { *(.data.init_task) } /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE_asm); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -91,7 +104,7 @@ SECTIONS from .altinstructions and .eh_frame */ .exit.text : { *(.exit.text) } .exit.data : { *(.exit.data) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; @@ -99,10 +112,22 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE_asm); __init_end = .; /* freed after init ends here */ - + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_tss : { *(.data.tss) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_default_ldt : { *(.data.default_ldt) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_idt : { *(.data.idt) } + + . = ALIGN(PAGE_SIZE_asm); + .data.page_aligned_gdt : { *(.data.gdt) } + __bss_start = .; /* BSS */ .bss : { *(.bss) } __bss_stop = .; @@ -122,4 +147,6 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + } --- linux-2.6.2-rc1/arch/i386/kernel/vsyscall.lds 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vsyscall.lds 2004-01-22 00:01:18.000000000 -0800 @@ -5,7 +5,7 @@ */ /* This must match . */ -VSYSCALL_BASE = 0xffffe000; +VSYSCALL_BASE = 0xffffd000; SECTIONS { --- linux-2.6.2-rc1/arch/i386/kernel/vsyscall-sysenter.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/vsyscall-sysenter.S 2004-01-22 00:01:18.000000000 -0800 @@ -7,6 +7,11 @@ .type __kernel_vsyscall,@function __kernel_vsyscall: .LSTART_vsyscall: + cmpl $192, %eax + jne 1f + int $0x80 + ret +1: push %ecx .Lpush_ecx: push %edx --- linux-2.6.2-rc1/arch/i386/lib/checksum.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/checksum.S 2004-01-22 00:01:18.000000000 -0800 @@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic ( .previous .align 4 -.globl csum_partial_copy_generic +.globl direct_csum_partial_copy_generic #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 -csum_partial_copy_generic: +direct_csum_partial_copy_generic: subl $4,%esp pushl %edi pushl %esi @@ -422,7 +422,7 @@ DST( movb %cl, (%edi) ) #define ARGBASE 12 -csum_partial_copy_generic: +direct_csum_partial_copy_generic: pushl %ebx pushl %edi pushl %esi --- linux-2.6.2-rc1/arch/i386/lib/dec_and_lock.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/dec_and_lock.c 2004-01-22 00:01:17.000000000 -0800 @@ -10,6 +10,7 @@ #include #include +#ifndef ATOMIC_DEC_AND_LOCK int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { int counter; @@ -38,3 +39,5 @@ slow_path: spin_unlock(lock); return 0; } +#endif + --- linux-2.6.2-rc1/arch/i386/lib/getuser.S 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/getuser.S 2004-01-22 00:01:18.000000000 -0800 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -28,7 +29,7 @@ .globl __get_user_1 __get_user_1: GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 1: movzbl (%eax),%edx xorl %eax,%eax @@ -40,7 +41,7 @@ __get_user_2: addl $1,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 2: movzwl -1(%eax),%edx xorl %eax,%eax @@ -52,7 +53,7 @@ __get_user_4: addl $3,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_ADDR_LIMIT(%edx),%eax + cmpl TI_addr_limit(%edx),%eax jae bad_get_user 3: movl -3(%eax),%edx xorl %eax,%eax --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/i386/lib/kgdb_serial.c 2004-01-21 23:26:59.000000000 -0800 @@ -0,0 +1,499 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#ifdef CONFIG_DISCONTIGMEM +static inline int kgdb_mem_init_done(void) +{ + return highmem_start_page != NULL; +} +#else +static inline int kgdb_mem_init_done(void) +{ + return max_mapnr != 0; +} +#endif + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.2-rc1/arch/i386/lib/Makefile 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/Makefile 2004-01-21 23:26:58.000000000 -0800 @@ -9,4 +9,5 @@ lib-y = checksum.o delay.o \ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o --- linux-2.6.2-rc1/arch/i386/lib/mmx.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/i386/lib/mmx.c 2004-01-21 23:30:03.000000000 -0800 @@ -121,7 +121,7 @@ void *_mmx_memcpy(void *to, const void * return p; } -#ifdef CONFIG_MK7 +#ifdef CONFIG_CPU_ONLY_K7 /* * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and --- linux-2.6.2-rc1/arch/i386/lib/usercopy.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/lib/usercopy.c 2004-01-22 00:01:18.000000000 -0800 @@ -76,7 +76,7 @@ do { \ * and returns @count. */ long -__strncpy_from_user(char *dst, const char __user *src, long count) +__direct_strncpy_from_user(char *dst, const char __user *src, long count) { long res; __do_strncpy_from_user(dst, src, count, res); @@ -102,7 +102,7 @@ __strncpy_from_user(char *dst, const cha * and returns @count. */ long -strncpy_from_user(char *dst, const char __user *src, long count) +direct_strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) @@ -147,7 +147,7 @@ do { \ * On success, this will be zero. */ unsigned long -clear_user(void __user *to, unsigned long n) +direct_clear_user(void __user *to, unsigned long n) { might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) @@ -167,7 +167,7 @@ clear_user(void __user *to, unsigned lon * On success, this will be zero. */ unsigned long -__clear_user(void __user *to, unsigned long n) +__direct_clear_user(void __user *to, unsigned long n) { __do_clear_user(to, n); return n; @@ -184,7 +184,7 @@ __clear_user(void __user *to, unsigned l * On exception, returns 0. * If the string is too long, returns a value greater than @n. */ -long strnlen_user(const char __user *s, long n) +long direct_strnlen_user(const char __user *s, long n) { unsigned long mask = -__addr_ok(s); unsigned long res, tmp; @@ -575,3 +575,4 @@ unsigned long __copy_from_user_ll(void * n = __copy_user_zeroing_intel(to, (const void *) from, n); return n; } + --- linux-2.6.2-rc1/arch/i386/mach-es7000/es7000.c 2003-06-16 22:32:20.000000000 -0700 +++ 25/arch/i386/mach-es7000/es7000.c 2004-01-21 23:27:03.000000000 -0800 @@ -51,8 +51,6 @@ struct mip_reg *host_reg; int mip_port; unsigned long mip_addr, host_addr; -static int es7000_plat; - /* * Parse the OEM Table */ --- linux-2.6.2-rc1/arch/i386/mach-voyager/voyager_smp.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/i386/mach-voyager/voyager_smp.c 2004-01-21 23:30:03.000000000 -0800 @@ -553,7 +553,7 @@ do_boot_cpu(__u8 cpu) /* For the 486, we can't use the 4Mb page table trick, so * must map a region of memory */ -#ifdef CONFIG_M486 +#ifdef CONFIG_CPU_486 int i; unsigned long *page_table_copies = (unsigned long *) __get_free_page(GFP_KERNEL); @@ -612,7 +612,7 @@ do_boot_cpu(__u8 cpu) /* set the original swapper_pg_dir[0] to map 0 to 4Mb transparently * (so that the booting CPU can find start_32 */ orig_swapper_pg_dir0 = swapper_pg_dir[0]; -#ifdef CONFIG_M486 +#ifdef CONFIG_CPU_486 if(page_table_copies == NULL) panic("No free memory for 486 page tables\n"); for(i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++) @@ -669,7 +669,7 @@ do_boot_cpu(__u8 cpu) /* reset the page table */ swapper_pg_dir[0] = orig_swapper_pg_dir0; local_flush_tlb(); -#ifdef CONFIG_M486 +#ifdef CONFIG_CPU_486 free_page((unsigned long)page_table_copies); #endif --- linux-2.6.2-rc1/arch/i386/Makefile 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/Makefile 2004-01-21 23:30:33.000000000 -0800 @@ -19,33 +19,59 @@ LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := -CFLAGS += -pipe +CFLAGS += -pipe -msoft-float # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) align := $(subst -functions=0,,$(call check_gcc,-falign-functions=0,-malign-functions=0)) -cflags-$(CONFIG_M386) += -march=i386 -cflags-$(CONFIG_M486) += -march=i486 -cflags-$(CONFIG_M586) += -march=i586 -cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call check_gcc,-march=pentium-mmx,-march=i586) -cflags-$(CONFIG_M686) += -march=i686 -cflags-$(CONFIG_MPENTIUMII) += $(call check_gcc,-march=pentium2,-march=i686) -cflags-$(CONFIG_MPENTIUMIII) += $(call check_gcc,-march=pentium3,-march=i686) -cflags-$(CONFIG_MPENTIUM4) += $(call check_gcc,-march=pentium4,-march=i686) -cflags-$(CONFIG_MK6) += $(call check_gcc,-march=k6,-march=i586) +cflags-$(CONFIG_CPU_PENTIUM4) := $(call check_gcc,-march=pentium4,-march=i686) + +ifdef CONFIG_CPU_PENTIUM4 + cflags-$(CONFIG_CPU_K8) := $(call check_gcc,-march=pentium3,-march=i686) +else + cflags-$(CONFIG_CPU_K8) := $(call check_gcc,-march=k8,$(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4)) +endif + # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call check_gcc,-march=k8,$(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4)) -cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MWINCHIPC6) += $(call check_gcc,-march=winchip-c6,-march=i586) -cflags-$(CONFIG_MWINCHIP2) += $(call check_gcc,-march=winchip2,-march=i586) -cflags-$(CONFIG_MWINCHIP3D) += $(call check_gcc,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call check_gcc,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MVIAC3_2) += $(call check_gcc,-march=c3-2,-march=i686) +ifdef CONFIG_CPU_PENTIUM4 + cflags-$(CONFIG_CPU_K7) := $(call check_gcc,-march=pentium3,-march=i686) +else + cflags-$(CONFIG_CPU_K7) := $(call check_gcc,-march=athlon,-march=i686 $(align)-functions=4) +endif + +cflags-$(CONFIG_CPU_PENTIUMM) := $(call check_gcc,-march=pentium3,-march=i686) +cflags-$(CONFIG_CPU_PENTIUMIII) := $(call check_gcc,-march=pentium3,-march=i686) +cflags-$(CONFIG_CPU_PENTIUMII) := $(call check_gcc,-march=pentium2,-march=i686) +cflags-$(CONFIG_CPU_VIAC3_2) := $(call check_gcc,-march=c3-2,-march=i686) +cflags-$(CONFIG_CPU_CRUSOE) := -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_CPU_686) := -march=i686 + +# supports i686 without cmov +cflags-$(CONFIG_CPU_CYRIXIII) := $(call check_gcc,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 + +cflags-$(CONFIG_CPU_K6) := -march=k6 +cflags-$(CONFIG_CPU_586MMX) := $(call check_gcc,-march=pentium-mmx,-march=i586) + +# Winchip supports i586 +cflags-$(CONFIG_CPU_WINCHIPC6) := $(call check_gcc,-march=winchip-c6,-march=i486) +cflags-$(CONFIG_CPU_WINCHIP2) := $(call check_gcc,-march=winchip2,-march=i486) +cflags-$(CONFIG_CPU_WINCHIP3D) := $(call check_gcc,-march=winchip2,-march=i486) + +cflags-$(CONFIG_CPU_586TSC) := -march=i586 +cflags-$(CONFIG_CPU_586) := -march=i586 +cflags-$(CONFIG_CPU_486) := -march=i486 +cflags-$(CONFIG_CPU_386) := -march=i386 + +# AMD Elan support +cflags-$(CONFIG_X86_ELAN) := -march=i486 + +# -mregparm=3 works ok on gcc-3.0 and later +# +GCC_VERSION := $(shell $(CONFIG_SHELL) scripts/gcc-version.sh $(CC)) +cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;) CFLAGS += $(cflags-y) @@ -84,6 +110,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default +mflags-$(CONFIG_KGDB) += -gdwarf-2 +mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ --- linux-2.6.2-rc1/arch/i386/math-emu/fpu_system.h 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/math-emu/fpu_system.h 2004-01-22 00:01:18.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include /* This sets the pointer FPU_info to point to the argument part of the stack frame of math_emulate() */ @@ -22,7 +23,7 @@ /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#define LDT_DESCRIPTOR(s) (((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3]) #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) --- linux-2.6.2-rc1/arch/i386/mm/fault.c 2003-12-17 21:20:01.000000000 -0800 +++ 25/arch/i386/mm/fault.c 2004-01-22 00:01:18.000000000 -0800 @@ -27,6 +27,7 @@ #include #include #include +#include extern void die(const char *,struct pt_regs *,long); @@ -104,8 +105,17 @@ static inline unsigned long get_segment_ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ down(¤t->mm->context.sem); +#if 1 + /* horrible hack for 4/4 disabled kernels. + I'm not quite sure what the TLB flush is good for, + it's mindlessly copied from the read_ldt code */ + __flush_tlb_global(); + desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]); + desc = (void *)desc + ((seg & ~7) % PAGE_SIZE); +#else desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); +#endif } else { /* Must disable preemption while reading the GDT. */ desc = (u32 *)&cpu_gdt_table[get_cpu()]; @@ -118,6 +128,9 @@ static inline unsigned long get_segment_ (desc[1] & 0xff000000); if (seg & (1<<2)) { +#if 1 + kunmap((void *)((unsigned long)desc & PAGE_MASK)); +#endif up(¤t->mm->context.sem); } else put_cpu(); @@ -243,6 +256,19 @@ asmlinkage void do_page_fault(struct pt_ * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 1) == 0. */ +#ifdef CONFIG_X86_4G + /* + * On 4/4 all kernels faults are either bugs, vmalloc or prefetch + */ + if (unlikely((regs->xcs & 3) == 0)) { + if (error_code & 3) + goto bad_area_nosemaphore; + + /* If it's vm86 fall through */ + if (!(regs->eflags & VM_MASK)) + goto vmalloc_fault; + } +#else if (unlikely(address >= TASK_SIZE)) { if (!(error_code & 5)) goto vmalloc_fault; @@ -252,6 +278,7 @@ asmlinkage void do_page_fault(struct pt_ */ goto bad_area_nosemaphore; } +#endif mm = tsk->mm; @@ -403,6 +430,12 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ +#ifdef CONFIG_KGDB + if (!user_mode(regs)){ + kgdb_handle_exception(14,SIGBUS, error_code, regs); + return; + } +#endif bust_spinlocks(1); --- linux-2.6.2-rc1/arch/i386/mm/init.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/mm/init.c 2004-01-22 00:01:18.000000000 -0800 @@ -40,125 +40,13 @@ #include #include #include +#include DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int do_test_wp_bit(void); -/* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - if (pmd_table != pmd_offset(pgd, 0)) - BUG(); -#else - pmd_table = pmd_offset(pgd, 0); -#endif - - return pmd_table; -} - -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static pte_t * __init one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); - if (page_table != pte_offset_kernel(pmd, 0)) - BUG(); - - return page_table; - } - - return pte_offset_kernel(pmd, 0); -} - -/* - * This function initializes a certain range of kernel virtual memory - * with new bootmem page tables, everywhere page tables are missing in - * the given range. - */ - -/* - * NOTE: The pagetables are allocated contiguous on the physical space - * so we can cache the place of the first one and move around without - * checking the pgd every time. - */ -static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - int pgd_idx, pmd_idx; - unsigned long vaddr; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pmd_idx = pmd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (pgd_none(*pgd)) - one_md_table_init(pgd); - - pmd = pmd_offset(pgd, vaddr); - for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - if (pmd_none(*pmd)) - one_page_table_init(pmd); - - vaddr += PMD_SIZE; - } - pmd_idx = 0; - } -} - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET. - */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) -{ - unsigned long pfn; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int pgd_idx, pmd_idx, pte_ofs; - - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; - - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { - /* Map with big pages if possible, otherwise create normal page tables. */ - if (cpu_has_pse) { - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); - pfn += PTRS_PER_PTE; - } else { - pte = one_page_table_init(pmd); - - for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); - } - } - } -} - static inline int page_kills_ppro(unsigned long pagenr) { if (pagenr >= 0x70000 && pagenr <= 0x7003F) @@ -206,11 +94,8 @@ static inline int page_is_ram(unsigned l return 0; } -#ifdef CONFIG_HIGHMEM pte_t *kmap_pte; -pgprot_t kmap_prot; -EXPORT_SYMBOL(kmap_prot); EXPORT_SYMBOL(kmap_pte); #define kmap_get_fixmap_pte(vaddr) \ @@ -218,29 +103,7 @@ EXPORT_SYMBOL(kmap_pte); void __init kmap_init(void) { - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} - -void __init permanent_kmaps_init(pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + pgd_index(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; + kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN)); } void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) @@ -255,6 +118,8 @@ void __init one_highpage_init(struct pag SetPageReserved(page); } +#ifdef CONFIG_HIGHMEM + #ifndef CONFIG_DISCONTIGMEM void __init set_highmem_pages_init(int bad_ppro) { @@ -266,12 +131,9 @@ void __init set_highmem_pages_init(int b #else extern void set_highmem_pages_init(int); #endif /* !CONFIG_DISCONTIGMEM */ - #else -#define kmap_init() do { } while (0) -#define permanent_kmaps_init(pgd_base) do { } while (0) -#define set_highmem_pages_init(bad_ppro) do { } while (0) -#endif /* CONFIG_HIGHMEM */ +# define set_highmem_pages_init(bad_ppro) do { } while (0) +#endif unsigned long __PAGE_KERNEL = _PAGE_KERNEL; @@ -281,30 +143,125 @@ unsigned long __PAGE_KERNEL = _PAGE_KERN extern void __init remap_numa_kva(void); #endif -static void __init pagetable_init (void) +static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_base + pgd_index(address); + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); + } +} + +static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) +{ + unsigned long vaddr; + + for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE) + prepare_pagetables(pgd_base, vaddr); +} + +void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) { unsigned long vaddr; - pgd_t *pgd_base = swapper_pg_dir; + pgd_t *pgd; + int i, j, k; + pmd_t *pmd; + pte_t *pte, *pte_base; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + if (cpu_has_pse) { + unsigned long __pe; + set_in_cr4(X86_CR4_PSE); + boot_cpu_data.wp_works_ok = 1; + __pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start; + /* Make it "global" too if supported */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); +#if !defined(CONFIG_X86_SWITCH_PAGETABLES) + __pe += _PAGE_GLOBAL; + __PAGE_KERNEL |= _PAGE_GLOBAL; +#endif + } + set_pmd(pmd, __pmd(__pe)); + continue; + } + if (!pmd_present(*pmd)) + pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + else + pte_base = (pte_t *) page_address(pmd_page(*pmd)); + pte = pte_base; + for (k = 0; k < PTRS_PER_PTE; pte++, k++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + *pte = mk_pte_phys(vaddr-start, PAGE_KERNEL); + } + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); + } + } +} + +static void __init pagetable_init (void) +{ + unsigned long vaddr, end; + pgd_t *pgd_base; #ifdef CONFIG_X86_PAE int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); #endif - /* Enable PSE if available */ - if (cpu_has_pse) { - set_in_cr4(X86_CR4_PSE); - } + /* + * This can be zero as well - no problem, in that case we exit + * the loops anyway due to the PTRS_PER_* conditions. + */ + end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; + pgd_base = swapper_pg_dir; +#ifdef CONFIG_X86_PAE + /* + * It causes too many problems if there's no proper pmd set up + * for all 4 entries of the PGD - so we allocate all of them. + * PAE systems will not miss this extra 4-8K anyway ... + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1)); } +#endif + /* + * Set up lowmem-sized identity mappings at PAGE_OFFSET: + */ + setup_identity_mappings(pgd_base, PAGE_OFFSET, end); - kernel_physical_mapping_init(pgd_base); + /* + * Add flat-mode identity-mappings - SMP needs it when + * starting up on an AP from real-mode. (In the non-PAE + * case we already have these mappings through head.S.) + * All user-space mappings are explicitly cleared after + * SMP startup. + */ +#if CONFIG_SMP && CONFIG_X86_PAE + setup_identity_mappings(pgd_base, 0, 16*1024*1024); +#endif remap_numa_kva(); /* @@ -312,38 +269,64 @@ static void __init pagetable_init (void) * created - mappings will be set by set_fixmap(): */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - page_table_range_init(vaddr, 0, pgd_base); + fixrange_init(vaddr, 0, pgd_base); - permanent_kmaps_init(pgd_base); +#if CONFIG_HIGHMEM + { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; + } #endif } -void zap_low_mappings (void) +/* + * Clear kernel pagetables in a PMD_SIZE-aligned range. + */ +static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) { - int i; + unsigned long vaddr; + pgd_t *pgd; + pmd_t *pmd; + int i, j; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + pmd_clear(pmd); + } + } + flush_tlb_all(); +} + +void __init zap_low_mappings(void) +{ + printk("zapping low mappings.\n"); /* * Zap initial low-memory mappings. - * - * Note that "pgd_clear()" doesn't do it for - * us, because pgd_clear() is a no-op on i386. */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else - set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif - flush_tlb_all(); + clear_mappings(swapper_pg_dir, 0, 16*1024*1024); } #ifndef CONFIG_DISCONTIGMEM @@ -393,7 +376,15 @@ void __init paging_init(void) set_in_cr4(X86_CR4_PAE); #endif __flush_tlb_all(); - + /* + * Subtle. SMP is doing it's boot stuff late (because it has to + * fork idle threads) - but it also needs low mappings for the + * protected-mode entry to work. We zap these entries only after + * the WP-bit has been tested. + */ +#ifndef CONFIG_SMP + zap_low_mappings(); +#endif kmap_init(); zone_sizes_init(); } @@ -515,22 +506,18 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); - /* - * Subtle. SMP is doing it's boot stuff late (because it has to - * fork idle threads) - but it also needs low mappings for the - * protected-mode entry to work. We zap these entries only after - * the WP-bit has been tested. - */ -#ifndef CONFIG_SMP - zap_low_mappings(); -#endif + entry_trampoline_setup(); + default_ldt_page = virt_to_page(default_ldt); + load_LDT(&init_mm.context); } -kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; +kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache; void __init pgtable_cache_init(void) { + void (*ctor)(void *, kmem_cache_t *, unsigned long); + void (*dtor)(void *, kmem_cache_t *, unsigned long); + if (PTRS_PER_PMD > 1) { pmd_cache = kmem_cache_create("pmd", PTRS_PER_PMD*sizeof(pmd_t), @@ -540,13 +527,36 @@ void __init pgtable_cache_init(void) NULL); if (!pmd_cache) panic("pgtable_cache_init(): cannot create pmd cache"); + + if (TASK_SIZE > PAGE_OFFSET) { + kpmd_cache = kmem_cache_create("kpmd", + PTRS_PER_PMD*sizeof(pmd_t), + 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, + kpmd_ctor, + NULL); + if (!kpmd_cache) + panic("pgtable_cache_init(): " + "cannot create kpmd cache"); + } } + + if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET) + ctor = pgd_ctor; + else + ctor = NULL; + + if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET) + dtor = pgd_dtor; + else + dtor = NULL; + pgd_cache = kmem_cache_create("pgd", PTRS_PER_PGD*sizeof(pgd_t), 0, SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, - pgd_ctor, - PTRS_PER_PMD == 1 ? pgd_dtor : NULL); + ctor, + dtor); if (!pgd_cache) panic("pgtable_cache_init(): Cannot create pgd cache"); } --- linux-2.6.2-rc1/arch/i386/mm/pgtable.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/i386/mm/pgtable.c 2004-01-22 00:01:18.000000000 -0800 @@ -21,6 +21,7 @@ #include #include #include +#include void show_mem(void) { @@ -157,11 +158,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } +void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags) +{ + pmd_t *kpmd, *pmd; + kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1], + (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE); + pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS); + + memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t)); + memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t)); +} + /* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking + * List of all pgd's needed so it can invalidate entries in both cached + * and uncached pgd's. This is essentially codepath-based locking * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. @@ -170,30 +180,60 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * could be used. The locking scheme was chosen on the basis of * manfred's recommendations and having no core impact whatsoever. * -- wli + * + * The entire issue goes away when XKVA is configured. */ spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED; LIST_HEAD(pgd_list); -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +/* + * This is not that hard to figure out. + * (a) PTRS_PER_PMD == 1 means non-PAE. + * (b) PTRS_PER_PMD > 1 means PAE. + * (c) TASK_SIZE > PAGE_OFFSET means XKVA. + * (d) TASK_SIZE <= PAGE_OFFSET means non-XKVA. + * + * Do *NOT* back out the preconstruction like the patch I'm cleaning + * up after this very instant did, or at all, for that matter. + * This is never called when PTRS_PER_PMD > 1 && TASK_SIZE > PAGE_OFFSET. + * -- wli + */ +void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused) { + pgd_t *pgd = (pgd_t *)__pgd; unsigned long flags; - if (PTRS_PER_PMD == 1) - spin_lock_irqsave(&pgd_lock, flags); + if (PTRS_PER_PMD == 1) { + if (TASK_SIZE <= PAGE_OFFSET) + spin_lock_irqsave(&pgd_lock, flags); + else + memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS], + &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS], + NR_SHARED_PMDS * sizeof(pgd_t)); + } - memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + if (TASK_SIZE <= PAGE_OFFSET) + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); if (PTRS_PER_PMD > 1) return; - list_add(&virt_to_page(pgd)->lru, &pgd_list); - spin_unlock_irqrestore(&pgd_lock, flags); - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + if (TASK_SIZE > PAGE_OFFSET) + memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t)); + else { + list_add(&virt_to_page(pgd)->lru, &pgd_list); + spin_unlock_irqrestore(&pgd_lock, flags); + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + } } -/* never called when PTRS_PER_PMD > 1 */ +/* + * Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET + * for with PAE we would list_del() multiple times, and for non-PAE + * with XKVA all the AGP pgd shootdown code is unnecessary. + */ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ @@ -203,6 +243,12 @@ void pgd_dtor(void *pgd, kmem_cache_t *c spin_unlock_irqrestore(&pgd_lock, flags); } +/* + * See the comments above pgd_ctor() wrt. preconstruction. + * Do *NOT* memcpy() here. If you do, you back out important + * anti- cache pollution code. + * + */ pgd_t *pgd_alloc(struct mm_struct *mm) { int i; @@ -211,15 +257,33 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (PTRS_PER_PMD == 1 || !pgd) return pgd; + /* + * In the 4G userspace case alias the top 16 MB virtual + * memory range into the user mappings as well (these + * include the trampoline and CPU data structures). + */ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + kmem_cache_t *cache; + pmd_t *pmd; + + if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) + cache = kpmd_cache; + else + cache = pmd_cache; + + pmd = kmem_cache_alloc(cache, GFP_KERNEL); if (!pmd) goto out_oom; set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); } - return pgd; + return pgd; out_oom: + /* + * we don't have to handle the kpmd_cache here, since it's the + * last allocation, and has either nothing to free or when it + * succeeds the whole operation succeeds. + */ for (i--; i >= 0; i--) kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); kmem_cache_free(pgd_cache, pgd); @@ -230,10 +294,29 @@ void pgd_free(pgd_t *pgd) { int i; - /* in the PAE case user pgd entries are overwritten before usage */ - if (PTRS_PER_PMD > 1) - for (i = 0; i < USER_PTRS_PER_PGD; ++i) - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); /* in the non-PAE case, clear_page_tables() clears user pgd entries */ + if (PTRS_PER_PMD == 1) + goto out_free; + + /* in the PAE case user pgd entries are overwritten before usage */ + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { + kmem_cache_t *cache; + pmd_t *pmd = __va(pgd_val(pgd[i]) - 1); + + /* + * only userspace pmd's are cleared for us + * by mm/memory.c; it's a slab cache invariant + * that we must separate the kernel pmd slab + * all times, else we'll have bad pmd's. + */ + if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) + cache = kpmd_cache; + else + cache = pmd_cache; + + kmem_cache_free(cache, pmd); + } +out_free: kmem_cache_free(pgd_cache, pgd); } + --- linux-2.6.2-rc1/arch/i386/oprofile/op_model_p4.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/oprofile/op_model_p4.c 2004-01-21 23:27:15.000000000 -0800 @@ -382,11 +382,8 @@ static struct p4_event_binding p4_events static unsigned int get_stagger(void) { #ifdef CONFIG_SMP - int cpu; - if (smp_num_siblings > 1) { - cpu = smp_processor_id(); - return (cpu_sibling_map[cpu] > cpu) ? 0 : 1; - } + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); #endif return 0; } --- linux-2.6.2-rc1/arch/i386/pci/acpi.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/i386/pci/acpi.c 2004-01-21 23:27:03.000000000 -0800 @@ -18,7 +18,7 @@ static int __init pci_acpi_init(void) if (pcibios_scanned) return 0; - if (!(pci_probe & PCI_NO_ACPI_ROUTING)) { + if (!acpi_noirq) { if (!acpi_pci_irq_init()) { printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi' or even 'acpi=off'\n"); @@ -31,15 +31,4 @@ static int __init pci_acpi_init(void) return 0; } - -/* - * pci_disable_acpi() - * act like pci=noacpi seen on command line - * called by DMI blacklist code - */ -__init void pci_disable_acpi(void) -{ - pci_probe |= PCI_NO_ACPI_ROUTING; -} - subsys_initcall(pci_acpi_init); --- linux-2.6.2-rc1/arch/i386/pci/common.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/pci/common.c 2004-01-21 23:27:03.000000000 -0800 @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -197,12 +198,10 @@ char * __devinit pcibios_setup(char *st return NULL; } #endif -#ifdef CONFIG_ACPI_PCI else if (!strcmp(str, "noacpi")) { - pci_probe |= PCI_NO_ACPI_ROUTING; + acpi_noirq_set(); return NULL; } -#endif #ifndef CONFIG_X86_VISWS else if (!strcmp(str, "usepirqmask")) { pci_probe |= PCI_USE_PIRQ_MASK; --- linux-2.6.2-rc1/arch/i386/pci/pci.h 2003-06-22 12:04:43.000000000 -0700 +++ 25/arch/i386/pci/pci.h 2004-01-21 23:27:03.000000000 -0800 @@ -22,7 +22,6 @@ #define PCI_ASSIGN_ROMS 0x1000 #define PCI_BIOS_IRQ_SCAN 0x2000 #define PCI_ASSIGN_ALL_BUSSES 0x4000 -#define PCI_NO_ACPI_ROUTING 0x8000 extern unsigned int pci_probe; --- linux-2.6.2-rc1/arch/ia64/hp/sim/simeth.c 2003-07-27 12:14:38.000000000 -0700 +++ 25/arch/ia64/hp/sim/simeth.c 2004-01-21 23:26:52.000000000 -0800 @@ -224,7 +224,7 @@ simeth_probe1(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.2-rc1/arch/ia64/Kconfig 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/ia64/Kconfig 2004-01-22 00:01:17.000000000 -0800 @@ -510,6 +510,8 @@ source "drivers/input/Kconfig" source "drivers/char/Kconfig" +source "drivers/i2c/Kconfig" + #source drivers/misc/Config.in source "drivers/media/Kconfig" @@ -662,6 +664,13 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + endmenu source "security/Kconfig" --- linux-2.6.2-rc1/arch/ia64/kernel/unwind.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/ia64/kernel/unwind.c 2004-01-21 23:30:45.000000000 -0800 @@ -650,7 +650,7 @@ free_state_stack (struct unw_reg_state * /* Unwind decoder routines */ -static enum unw_register_index __attribute__((const)) +static enum unw_register_index __attribute_const__ decode_abreg (unsigned char abreg, int memory) { switch (abreg) { --- linux-2.6.2-rc1/arch/ia64/sn/kernel/sn2/io.c 2003-10-17 15:58:03.000000000 -0700 +++ 25/arch/ia64/sn/kernel/sn2/io.c 2004-01-21 23:30:44.000000000 -0800 @@ -23,6 +23,10 @@ #undef __sn_readw #undef __sn_readl #undef __sn_readq +#undef __sn_readb_relaxed +#undef __sn_readw_relaxed +#undef __sn_readl_relaxed +#undef __sn_readq_relaxed unsigned int __sn_inb (unsigned long port) @@ -84,4 +88,28 @@ __sn_readq (void *addr) return ___sn_readq (addr); } +unsigned char +__sn_readb_relaxed (void *addr) +{ + return ___sn_readb_relaxed (addr); +} + +unsigned short +__sn_readw_relaxed (void *addr) +{ + return ___sn_readw_relaxed (addr); +} + +unsigned int +__sn_readl_relaxed (void *addr) +{ + return ___sn_readl_relaxed (addr); +} + +unsigned long +__sn_readq_relaxed (void *addr) +{ + return ___sn_readq_relaxed (addr); +} + #endif --- linux-2.6.2-rc1/arch/m68k/Kconfig 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/m68k/Kconfig 2004-01-21 23:30:41.000000000 -0800 @@ -1073,8 +1073,7 @@ config GEN_RTC precision in some cases. To compile this driver as a module, choose M here: the - module will be called genrtc. To load the module automatically - add 'alias char-major-10-135 genrtc' to your /etc/modules.conf + module will be called genrtc. config GEN_RTC_X bool "Extended RTC operation" --- linux-2.6.2-rc1/arch/ppc64/defconfig 2003-09-27 18:57:44.000000000 -0700 +++ 25/arch/ppc64/defconfig 2004-01-21 23:27:06.000000000 -0800 @@ -1,6 +1,7 @@ # # Automatically generated make config: don't edit # +CONFIG_64BIT=y CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_ISA_DMA=y @@ -14,7 +15,8 @@ CONFIG_FORCE_MAX_ZONEORDER=13 # Code maturity level options # CONFIG_EXPERIMENTAL=y -# CONFIG_BROKEN is not set +CONFIG_CLEAN_COMPILE=y +CONFIG_STANDALONE=y # # General setup @@ -33,6 +35,7 @@ CONFIG_EPOLL=y CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set # # Loadable module support @@ -51,21 +54,23 @@ CONFIG_OBSOLETE_MODPARM=y CONFIG_PPC_PSERIES=y CONFIG_PPC=y CONFIG_PPC64=y +CONFIG_ALTIVEC=y +# CONFIG_POWER4_ONLY is not set CONFIG_SMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NR_CPUS=32 # CONFIG_HMT is not set # CONFIG_DISCONTIGMEM is not set -# CONFIG_RTAS_FLASH is not set -CONFIG_SCANLOG=y CONFIG_PPC_RTAS=y +CONFIG_RTAS_FLASH=m +CONFIG_SCANLOG=m +CONFIG_LPARCFG=y # # General setup # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y -CONFIG_KCORE_ELF=y CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y @@ -75,6 +80,10 @@ CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set # +# Device Drivers +# + +# # Generic Driver Options # @@ -117,6 +126,7 @@ CONFIG_BLK_DEV_INITRD=y # SCSI device support # CONFIG_SCSI=y +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -145,9 +155,9 @@ CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_AIC7XXX is not set # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_DPT_I2O is not set # CONFIG_SCSI_ADVANSYS is not set # CONFIG_SCSI_MEGARAID is not set +# CONFIG_SCSI_SATA is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_CPQFCTS is not set # CONFIG_SCSI_DMX3191D is not set @@ -155,18 +165,21 @@ CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INIA100 is not set CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set # CONFIG_SCSI_QLOGIC_ISP is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set +CONFIG_SCSI_QLA2XXX_CONFIG=y +CONFIG_SCSI_QLA2XXX=y +CONFIG_SCSI_QLA21XX=y +CONFIG_SCSI_QLA22XX=y +CONFIG_SCSI_QLA23XX=y # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_NSP32 is not set @@ -181,6 +194,7 @@ CONFIG_MD_LINEAR=y CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_MD_RAID5=y +# CONFIG_MD_RAID6 is not set # CONFIG_MD_MULTIPATH is not set CONFIG_BLK_DEV_DM=y CONFIG_DM_IOCTL_V4=y @@ -240,7 +254,9 @@ CONFIG_IPV6_SCTP__=y # CONFIG_IP_SCTP is not set # CONFIG_ATM is not set # CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_NET_DIVERT is not set @@ -268,7 +284,6 @@ CONFIG_DUMMY=m CONFIG_BONDING=m # CONFIG_EQUALIZER is not set CONFIG_TUN=m -# CONFIG_ETHERTAP is not set # # Ethernet (10 or 100Mbit) @@ -292,6 +307,7 @@ CONFIG_PCNET32=y # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y @@ -303,7 +319,6 @@ CONFIG_E100=y # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # @@ -344,10 +359,10 @@ CONFIG_PPPOE=m # CONFIG_NET_RADIO is not set # -# Token Ring devices (depends on LLC=y) +# Token Ring devices # +# CONFIG_TR is not set # CONFIG_NET_FC is not set -# CONFIG_RCPCI is not set # CONFIG_SHAPER is not set # @@ -366,6 +381,11 @@ CONFIG_PPPOE=m # CONFIG_IRDA is not set # +# Bluetooth support +# +# CONFIG_BT is not set + +# # ISDN subsystem # # CONFIG_ISDN_BOOL is not set @@ -433,6 +453,7 @@ CONFIG_HW_CONSOLE=y # CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set # @@ -445,20 +466,6 @@ CONFIG_UNIX98_PTY_COUNT=256 CONFIG_HVC_CONSOLE=y # -# I2C support -# -# CONFIG_I2C is not set - -# -# I2C Hardware Sensors Mainboard support -# - -# -# I2C Hardware Sensors Chip support -# -# CONFIG_I2C_SENSOR is not set - -# # Mice # # CONFIG_BUSMOUSE is not set @@ -483,11 +490,15 @@ CONFIG_HVC_CONSOLE=y # # Ftape, the floppy tape device driver # -# CONFIG_FTAPE is not set # CONFIG_AGP is not set # CONFIG_DRM is not set CONFIG_RAW_DRIVER=y -# CONFIG_HANGCHECK_TIMER is not set +CONFIG_MAX_RAW_DEVS=256 + +# +# I2C support +# +# CONFIG_I2C is not set # # Multimedia devices @@ -500,6 +511,67 @@ CONFIG_RAW_DRIVER=y # CONFIG_DVB is not set # +# Graphics support +# +CONFIG_FB=y +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_OF=y +# CONFIG_FB_CT65550 is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S3TRIO is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_RIVA is not set +CONFIG_FB_MATROX=y +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G450=y +CONFIG_FB_MATROX_G100=y +CONFIG_FB_MATROX_MULTIHEAD=y +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_PCI_CONSOLE=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB is not set +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_GADGET is not set + +# # File systems # CONFIG_EXT2_FS=y @@ -551,11 +623,14 @@ CONFIG_VFAT_FS=y # Pseudo filesystems # CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y # CONFIG_DEVFS_FS is not set CONFIG_DEVPTS_FS=y CONFIG_DEVPTS_FS_XATTR=y # CONFIG_DEVPTS_FS_SECURITY is not set CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y # @@ -580,6 +655,7 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_NFS_V4=y +# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y CONFIG_NFSD_V4=y @@ -602,11 +678,11 @@ CONFIG_CIFS=m # # CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y -CONFIG_NLS=y # # Native Language Support # +CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_CODEPAGE_437 is not set # CONFIG_NLS_CODEPAGE_737 is not set @@ -647,72 +723,6 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_UTF8 is not set # -# Graphics support -# -CONFIG_FB=y -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_OF=y -# CONFIG_FB_CT65550 is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_S3TRIO is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_RIVA is not set -CONFIG_FB_MATROX=y -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G450=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_MULTIHEAD=y -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_VIRTUAL is not set - -# -# Console display driver support -# -# CONFIG_VGA_CONSOLE is not set -# CONFIG_MDA_CONSOLE is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_PCI_CONSOLE=y -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Logo configuration -# -CONFIG_LOGO=y -CONFIG_LOGO_LINUX_MONO=y -CONFIG_LOGO_LINUX_VGA16=y -CONFIG_LOGO_LINUX_CLUT224=y - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set -# CONFIG_USB_GADGET is not set - -# -# Bluetooth support -# -# CONFIG_BT is not set - -# # Profiling support # CONFIG_PROFILING=y --- linux-2.6.2-rc1/arch/ppc64/Kconfig 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/ppc64/Kconfig 2004-01-21 23:27:05.000000000 -0800 @@ -286,6 +286,8 @@ source "drivers/input/Kconfig" source "drivers/char/Kconfig" +source "drivers/i2c/Kconfig" + source "drivers/media/Kconfig" source "fs/Kconfig" --- linux-2.6.2-rc1/arch/ppc64/kernel/lparcfg.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/ppc64/kernel/lparcfg.c 2004-01-21 23:27:05.000000000 -0800 @@ -5,6 +5,8 @@ * Copyright (c) 2003 Dave Engebretsen * Will Schmidt willschm@us.ibm.com * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. + * Nathan Lynch nathanl@austin.ibm.com + * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -33,6 +35,9 @@ static struct proc_dir_entry *proc_ppc64 #define LPARCFG_BUFF_SIZE 4096 #ifdef CONFIG_PPC_ISERIES + +#define lparcfg_write NULL + static unsigned char e2a(unsigned char x) { switch (x) { @@ -383,6 +388,99 @@ static int lparcfg_data(unsigned char *b } return 0; } + +/* + * Interface for changing system parameters (variable capacity weight + * and entitled capacity). Format of input is "param_name=value"; + * anything after value is ignored. Valid parameters at this time are + * "partition_entitled_capacity" and "capacity_weight". We use + * H_SET_PPP to alter parameters. + * + * This function should be invoked only on systems with + * FW_FEATURE_SPLPAR. + */ +static ssize_t lparcfg_write(struct file *file, const char __user *buf, size_t count, loff_t *off) +{ + char *kbuf; + char *tmp; + u64 new_entitled, *new_entitled_ptr = &new_entitled; + u8 new_weight, *new_weight_ptr = &new_weight; + + unsigned long current_entitled; /* parameters for h_get_ppp */ + unsigned long dummy; + unsigned long resource; + u8 current_weight; + + ssize_t retval = -ENOMEM; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + goto out; + + retval = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + + retval = -EINVAL; + kbuf[count - 1] = '\0'; + tmp = strchr(kbuf, '='); + if (!tmp) + goto out; + + *tmp++ = '\0'; + + if (!strcmp(kbuf, "partition_entitled_capacity")) { + char *endp; + *new_entitled_ptr = (u64)simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + goto out; + new_weight_ptr = ¤t_weight; + } else if (!strcmp(kbuf, "capacity_weight")) { + char *endp; + *new_weight_ptr = (u8)simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + goto out; + new_entitled_ptr = ¤t_entitled; + } else + goto out; + + /* Get our current parameters */ + retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); + if (retval) { + retval = -EIO; + goto out; + } + + current_weight = (resource>>5*8)&0xFF; + + pr_debug("%s: current_entitled = %lu, current_weight = %lu\n", + __FUNCTION__, current_entitled, current_weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %lu\n", + __FUNCTION__, *new_entitled_ptr, *new_weight_ptr); + + retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, + *new_weight_ptr); + + if (retval == H_Success || retval == H_Constrained) { + retval = count; + } else if (retval == H_Busy) { + retval = -EBUSY; + } else if (retval == H_Hardware) { + retval = -EIO; + } else if (retval == H_Parameter) { + retval = -EINVAL; + } else { + printk(KERN_WARNING "%s: received unknown hv return code %ld", + __FUNCTION__, retval); + retval = -EIO; + } + +out: + kfree(kbuf); + return retval; +} + #endif /* CONFIG_PPC_PSERIES */ @@ -442,8 +540,15 @@ struct file_operations lparcfg_fops = { int __init lparcfg_init(void) { struct proc_dir_entry *ent; + mode_t mode = S_IRUSR; + + /* Allow writing if we have FW_FEATURE_SPLPAR */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + lparcfg_fops.write = lparcfg_write; + mode |= S_IWUSR; + } - ent = create_proc_entry("ppc64/lparcfg", S_IRUSR, NULL); + ent = create_proc_entry("ppc64/lparcfg", mode, NULL); if (ent) { ent->proc_fops = &lparcfg_fops; ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL); --- linux-2.6.2-rc1/arch/ppc64/kernel/prom.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/ppc64/kernel/prom.c 2004-01-21 23:27:06.000000000 -0800 @@ -2560,6 +2560,7 @@ int of_remove_node(struct device_node *n return 0; } +#ifdef CONFIG_PROC_DEVICETREE /* * Add a node to /proc/device-tree. */ @@ -2592,6 +2593,17 @@ static void remove_node_proc_entries(str if (np->pde) remove_proc_entry(np->pde->name, parent->pde); } +#else /* !CONFIG_PROC_DEVICETREE */ +static void add_node_proc_entries(struct device_node *np) +{ + return; +} + +static void remove_node_proc_entries(struct device_node *np) +{ + return; +} +#endif /* CONFIG_PROC_DEVICETREE */ /* * Fix up the uninitialized fields in a new device node: --- linux-2.6.2-rc1/arch/ppc64/kernel/sys_ppc32.c 2004-01-20 21:51:17.000000000 -0800 +++ 25/arch/ppc64/kernel/sys_ppc32.c 2004-01-21 23:27:06.000000000 -0800 @@ -2011,6 +2011,8 @@ static int do_execve32(char * filename, int retval; int i; + sched_balance_exec(); + file = open_exec(filename); retval = PTR_ERR(file); @@ -2843,6 +2845,7 @@ long ppc32_timer_create(clockid_t clock, return -EFAULT; savefs = get_fs(); + set_fs(KERNEL_DS); err = sys_timer_create(clock, &event, &t); set_fs(savefs); --- linux-2.6.2-rc1/arch/ppc/8260_io/enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8260_io/enet.c 2004-01-21 23:26:52.000000000 -0800 @@ -851,7 +851,7 @@ static int __init scc_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.2-rc1/arch/ppc/8260_io/fcc_enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8260_io/fcc_enet.c 2004-01-21 23:26:52.000000000 -0800 @@ -1371,7 +1371,7 @@ static int __init fec_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.2-rc1/arch/ppc/8xx_io/enet.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/8xx_io/enet.c 2004-01-21 23:26:52.000000000 -0800 @@ -949,7 +949,7 @@ static int __init scc_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.2-rc1/arch/ppc/8xx_io/fec.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/ppc/8xx_io/fec.c 2004-01-21 23:26:52.000000000 -0800 @@ -1748,7 +1748,7 @@ static int __init fec_enet_init(void) err = register_netdev(dev); if (err) { - kfree(dev); + free_netdev(dev); return err; } --- linux-2.6.2-rc1/arch/ppc/boot/ld.script 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/ppc/boot/ld.script 2004-01-22 00:01:19.000000000 -0800 @@ -82,6 +82,7 @@ SECTIONS *(__ksymtab) *(__ksymtab_strings) *(__bug_table) + *(__kcrctab) } } --- linux-2.6.2-rc1/arch/ppc/kernel/setup.c 2003-10-17 15:58:03.000000000 -0700 +++ 25/arch/ppc/kernel/setup.c 2004-01-21 20:11:14.000000000 -0800 @@ -675,7 +675,6 @@ void __init setup_arch(char **cmdline_p) if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); paging_init(); - sort_exception_table(); /* this is for modules since _machine can be a define -- Cort */ ppc_md.ppc_machine = _machine; --- linux-2.6.2-rc1/arch/ppc/kernel/signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/ppc/kernel/signal.c 2004-01-22 00:01:15.000000000 -0800 @@ -241,12 +241,16 @@ save_user_regs(struct pt_regs *regs, str * (except for MSR). */ static int -restore_user_regs(struct pt_regs *regs, struct mcontext __user *sr) +restore_user_regs(struct pt_regs *regs, struct mcontext __user *sr, int sig) { + unsigned long save_r2; #ifdef CONFIG_ALTIVEC unsigned long msr; #endif + /* backup/restore the TLS as we don't want it to be modified */ + if (!sig) + save_r2 = regs->gpr[2]; /* copy up to but not including MSR */ if (__copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t))) return 1; @@ -254,6 +258,8 @@ restore_user_regs(struct pt_regs *regs, if (__copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t))) return 1; + if (!sig) + regs->gpr[2] = save_r2; /* force the process to reload the FP registers from current->thread when it next does FP instructions */ @@ -359,7 +365,7 @@ badframe: force_sig(SIGSEGV, current); } -static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs) +static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig) { sigset_t set; struct mcontext *mcp; @@ -368,7 +374,7 @@ static int do_setcontext(struct ucontext || __get_user(mcp, &ucp->uc_regs)) return -EFAULT; restore_sigmask(&set); - if (restore_user_regs(regs, mcp)) + if (restore_user_regs(regs, mcp, sig)) return -EFAULT; return 0; @@ -376,10 +382,16 @@ static int do_setcontext(struct ucontext int sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, - int r5, int r6, int r7, int r8, struct pt_regs *regs) + int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) { unsigned char tmp; + /* Context size is for future use. Right now, we only make sure + * we are passed something we understand + */ + if (ctx_size < sizeof(struct ucontext)) + return -EINVAL; + if (old_ctx != NULL) { if (verify_area(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) || save_user_regs(regs, &old_ctx->uc_mcontext, 0) @@ -406,7 +418,7 @@ int sys_swapcontext(struct ucontext __us * or if another thread unmaps the region containing the context. * We kill the task with a SIGSEGV in this situation. */ - if (do_setcontext(new_ctx, regs)) + if (do_setcontext(new_ctx, regs, 0)) do_exit(SIGSEGV); sigreturn_exit(regs); /* doesn't actually return back to here */ @@ -425,7 +437,7 @@ int sys_rt_sigreturn(int r3, int r4, int (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); if (verify_area(VERIFY_READ, rt_sf, sizeof(struct rt_sigframe))) goto bad; - if (do_setcontext(&rt_sf->uc, regs)) + if (do_setcontext(&rt_sf->uc, regs, 1)) goto bad; /* @@ -484,7 +496,7 @@ handle_signal(unsigned long sig, struct if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) goto badframe; - if (put_user(regs->gpr[1], (unsigned long *)newsp)) + if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) goto badframe; regs->gpr[1] = newsp; regs->gpr[3] = sig; @@ -529,7 +541,7 @@ int sys_sigreturn(int r3, int r4, int r5 sr = (struct mcontext *) sigctx.regs; if (verify_area(VERIFY_READ, sr, sizeof(*sr)) - || restore_user_regs(regs, sr)) + || restore_user_regs(regs, sr, 1)) goto badframe; sigreturn_exit(regs); /* doesn't return */ --- linux-2.6.2-rc1/arch/sparc64/defconfig 2004-01-20 21:51:18.000000000 -0800 +++ 25/arch/sparc64/defconfig 2004-01-21 20:11:14.000000000 -0800 @@ -379,6 +379,7 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID5=m +CONFIG_MD_RAID6=m CONFIG_MD_MULTIPATH=m CONFIG_BLK_DEV_DM=m # CONFIG_DM_IOCTL_V4 is not set @@ -969,7 +970,7 @@ CONFIG_PHONE=m CONFIG_PHONE_IXJ=m # -# Unix 98 PTY support +# Unix98 PTY support # CONFIG_UNIX98_PTYS=y CONFIG_UNIX98_PTY_COUNT=256 @@ -1054,16 +1055,21 @@ CONFIG_I2C_ALI1535=m CONFIG_I2C_ALI15X3=m CONFIG_I2C_AMD756=m CONFIG_I2C_AMD8111=m +CONFIG_I2C_ELV=m CONFIG_I2C_I801=m CONFIG_I2C_I810=m +CONFIG_I2C_ISA=m CONFIG_I2C_NFORCE2=m CONFIG_I2C_PHILIPSPAR=m +CONFIG_I2C_PARPORT=m +CONFIG_I2C_PARPORT_LIGHT=m CONFIG_I2C_PROSAVAGE=m CONFIG_I2C_SAVAGE4=m CONFIG_SCx200_ACB=m CONFIG_I2C_SIS5595=m CONFIG_I2C_SIS630=m CONFIG_I2C_SIS96X=m +CONFIG_I2C_VELLEMAN=m CONFIG_I2C_VIA=m CONFIG_I2C_VIAPRO=m CONFIG_I2C_VOODOO3=m @@ -1073,14 +1079,20 @@ CONFIG_I2C_VOODOO3=m # CONFIG_I2C_SENSOR=m CONFIG_SENSORS_ADM1021=m +CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_EEPROM=m CONFIG_SENSORS_IT87=m CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM78=m CONFIG_SENSORS_LM83=m CONFIG_SENSORS_LM85=m +CONFIG_SENSORS_LM90=m CONFIG_SENSORS_VIA686A=m CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83L785TS=m +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # # File systems @@ -1572,12 +1584,15 @@ CONFIG_USB_EZUSB=y # # USB Miscellaneous drivers # +CONFIG_USB_EMI62=m +CONFIG_USB_EMI26=m # CONFIG_USB_TIGL is not set CONFIG_USB_AUERSWALD=m CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m # CONFIG_USB_BRLVGER is not set CONFIG_USB_LCD=m +CONFIG_USB_LED=m CONFIG_USB_SPEEDTOUCH=m CONFIG_USB_TEST=m # CONFIG_USB_GADGET is not set --- linux-2.6.2-rc1/arch/sparc64/Kconfig 2004-01-20 21:51:18.000000000 -0800 +++ 25/arch/sparc64/Kconfig 2004-01-22 00:01:17.000000000 -0800 @@ -727,12 +727,19 @@ config STACK_DEBUG depends on DEBUG_KERNEL bool "Stack Overflow Detection Support" +config LOCKMETER + bool "Kernel lock metering" + depends on SMP && !PREEMPT + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + # We have a custom atomic_dec_and_lock() implementation but it's not # compatible with spinlock debugging so we need to fall back on # the generic version in that case. config HAVE_DEC_LOCK bool - depends on SMP && !DEBUG_SPINLOCK + depends on SMP && !DEBUG_SPINLOCK && !LOCKMETER default y config MCOUNT --- linux-2.6.2-rc1/arch/sparc64/kernel/sys_sparc32.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/sparc64/kernel/sys_sparc32.c 2004-01-21 20:11:14.000000000 -0800 @@ -2023,7 +2023,8 @@ out: security_bprm_free(&bprm); out_mm: - mmdrop(bprm.mm); + if (bprm.mm) + mmdrop(bprm.mm); out_file: if (bprm.file) { --- linux-2.6.2-rc1/arch/sparc64/lib/rwlock.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/sparc64/lib/rwlock.S 2004-01-22 00:01:17.000000000 -0800 @@ -85,5 +85,20 @@ __write_trylock_succeed: __write_trylock_fail: retl mov 0, %o0 + + .globl __read_trylock +__read_trylock: /* %o0 = lock_ptr */ + ldsw [%o0], %g5 + brlz,pn %g5, 100f + add %g5, 1, %g7 + cas [%o0], %g5, %g7 + cmp %g5, %g7 + bne,pn %icc, __read_trylock + membar #StoreLoad | #StoreStore + retl + mov 1, %o0 +100: retl + mov 0, %o0 + rwlock_impl_end: --- linux-2.6.2-rc1/arch/um/config.release 2003-08-08 22:55:11.000000000 -0700 +++ 25/arch/um/config.release 2004-01-21 23:30:29.000000000 -0800 @@ -228,7 +228,6 @@ CONFIG_ROMFS_FS=m CONFIG_EXT2_FS=y CONFIG_SYSV_FS=m CONFIG_UDF_FS=m -# CONFIG_UDF_RW is not set CONFIG_UFS_FS=m # CONFIG_UFS_FS_WRITE is not set --- linux-2.6.2-rc1/arch/um/defconfig 2003-08-08 22:55:11.000000000 -0700 +++ 25/arch/um/defconfig 2004-01-21 23:30:29.000000000 -0800 @@ -3,29 +3,19 @@ # CONFIG_USERMODE=y CONFIG_MMU=y -CONFIG_SWAP=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_CONFIG_LOG_BUF_SHIFT=14 # -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General Setup +# UML-specific options # CONFIG_MODE_TT=y CONFIG_MODE_SKAS=y CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_AOUT=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=y CONFIG_HOSTFS=y +CONFIG_HPPFS=y CONFIG_MCONSOLE=y CONFIG_MAGIC_SYSRQ=y # CONFIG_HOST_2G_2G is not set @@ -34,14 +24,43 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_NEST_LEVEL=0 CONFIG_KERNEL_HALF_GIGS=1 # CONFIG_HIGHMEM is not set -CONFIG_PROC_MM=y +# CONFIG_PROC_MM is not set CONFIG_KERNEL_STACK_ORDER=2 +CONFIG_UML_REAL_TIME_CLOCK=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_STANDALONE=y +CONFIG_BROKEN_ON_SMP=y + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_IKCONFIG is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y # # Loadable module support # -CONFIG_MODULES=y -# CONFIG_KMOD is not set +# CONFIG_MODULES is not set + +# +# Generic Driver Options +# # # Character Devices @@ -69,6 +88,7 @@ CONFIG_HOSTAUDIO=y # CONFIG_BLK_DEV_UBD=y # CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_COW_COMMON=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y @@ -78,7 +98,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_NETDEVICES=y # -# Network Devices +# UML Network Devices # CONFIG_UML_NET=y CONFIG_UML_NET_ETHERTAP=y @@ -88,22 +108,6 @@ CONFIG_UML_NET_DAEMON=y CONFIG_UML_NET_MCAST=y # CONFIG_UML_NET_PCAP is not set CONFIG_UML_NET_SLIRP=y -CONFIG_DUMMY=y -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -CONFIG_TUN=y -# CONFIG_ETHERTAP is not set -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_ASYNC is not set -# CONFIG_PPP_SYNC_TTY is not set -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_SLIP=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set # # Networking support @@ -115,8 +119,6 @@ CONFIG_SLIP=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y # CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -# CONFIG_FILTER is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -130,8 +132,11 @@ CONFIG_INET=y # CONFIG_SYN_COOKIES is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set -# CONFIG_XFRM_USER is not set +# CONFIG_INET_IPCOMP is not set # CONFIG_IPV6 is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_NETFILTER is not set # # SCTP Configuration (EXPERIMENTAL) @@ -140,9 +145,9 @@ CONFIG_IPV6_SCTP__=y # CONFIG_IP_SCTP is not set # CONFIG_ATM is not set # CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_NET_DIVERT is not set @@ -160,6 +165,10 @@ CONFIG_IPV6_SCTP__=y # Network testing # # CONFIG_NET_PKTGEN is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y # # Ethernet (10 or 100Mbit) @@ -171,12 +180,28 @@ CONFIG_IPV6_SCTP__=y # # +# Ethernet (10000 Mbit) +# +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +# CONFIG_PPP_ASYNC is not set +# CONFIG_PPP_SYNC_TTY is not set +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPPOE is not set +CONFIG_SLIP=y +# CONFIG_SLIP_COMPRESSED is not set +# CONFIG_SLIP_SMART is not set +# CONFIG_SLIP_MODE_SLIP6 is not set + +# # Wireless LAN (non-hamradio) # # CONFIG_NET_RADIO is not set # -# Token Ring devices (depends on LLC=y) +# Token Ring devices # # CONFIG_SHAPER is not set @@ -186,68 +211,101 @@ CONFIG_IPV6_SCTP__=y # CONFIG_WAN is not set # +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# Bluetooth support +# +# CONFIG_BT is not set + +# # File systems # +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +CONFIG_MINIX_FS=y +# CONFIG_ROMFS_FS is not set CONFIG_QUOTA=y # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_DEVFS_FS=y +CONFIG_DEVFS_MOUNT=y +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_DEVPTS_FS_XATTR is not set +# CONFIG_TMPFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y + +# +# Miscellaneous filesystems +# # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set -# CONFIG_EXT3_FS is not set -# CONFIG_JBD is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m # CONFIG_EFS_FS is not set CONFIG_JFFS_FS=y CONFIG_JFFS_FS_VERBOSE=0 -CONFIG_JFFS_PROC_FS=y # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set -# CONFIG_TMPFS is not set -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=m -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -CONFIG_MINIX_FS=m # CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set # CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -CONFIG_DEVFS_MOUNT=y -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y # CONFIG_QNX4FS_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set # CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_XFS_FS is not set # # Network File Systems # -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set # CONFIG_EXPORTFS is not set -# CONFIG_CIFS is not set # CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set # CONFIG_AFS_FS is not set # @@ -317,28 +375,7 @@ CONFIG_NLS_DEFAULT="iso8859-1" # # SCSI support # -CONFIG_SCSI=y -CONFIG_GENERIC_ISA_DMA=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=2 -CONFIG_CHR_DEV_SG=y - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -CONFIG_SCSI_DEBUG_QUEUES=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_DEBUG=y +# CONFIG_SCSI is not set # # Multi-device support (RAID and LVM) @@ -360,6 +397,7 @@ CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y # CONFIG_FTL is not set # CONFIG_NFTL is not set +# CONFIG_INFTL is not set # # RAM/ROM/Flash chip drivers @@ -374,20 +412,21 @@ CONFIG_MTD_BLOCK=y # # Mapping drivers for chip access # +# CONFIG_MTD_COMPLEX_MAPPINGS is not set # # Self-contained MTD device drivers # # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_MTDRAM is not set -CONFIG_MTD_BLKMTD=m +CONFIG_MTD_BLKMTD=y # # Disk-On-Chip Device Drivers # -# CONFIG_MTD_DOC1000 is not set # CONFIG_MTD_DOC2000 is not set # CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set # # NAND Flash Device Drivers --- linux-2.6.2-rc1/arch/um/drivers/chan_kern.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/drivers/chan_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include "chan_kern.h" @@ -265,6 +266,11 @@ static int one_chan_config_string(struct { int n = 0; + if(chan == NULL){ + CONFIG_CHUNK(str, size, n, "none", 1); + return(n); + } + CONFIG_CHUNK(str, size, n, chan->ops->type, 0); if(chan->dev == NULL){ @@ -420,7 +426,8 @@ int parse_chan_pair(char *str, struct li INIT_LIST_HEAD(chans); } - if((out = strchr(str, ',')) != NULL){ + out = strchr(str, ','); + if(out != NULL){ in = str; *out = '\0'; out++; @@ -475,12 +482,15 @@ void chan_interrupt(struct list_head *ch goto out; } err = chan->ops->read(chan->fd, &c, chan->data); - if(err > 0) tty_receive_char(tty, c); + if(err > 0) + tty_receive_char(tty, c); } while(err > 0); + if(err == 0) reactivate_fd(chan->fd, irq); if(err == -EIO){ if(chan->primary){ - if(tty != NULL) tty_hangup(tty); + if(tty != NULL) + tty_hangup(tty); line_disable(dev, irq); close_chan(chans); free_chan(chans); --- linux-2.6.2-rc1/arch/um/drivers/chan_user.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/drivers/chan_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -24,29 +23,27 @@ void generic_close(int fd, void *unused) { - close(fd); + os_close_file(fd); } int generic_read(int fd, char *c_out, void *unused) { int n; - n = read(fd, c_out, sizeof(*c_out)); - if(n < 0){ - if(errno == EAGAIN) return(0); - return(-errno); - } - else if(n == 0) return(-EIO); - return(1); + n = os_read_file(fd, c_out, sizeof(*c_out)); + + if(n == -EAGAIN) + return(0); + else if(n == 0) + return(-EIO); + return(n); } +/* XXX Trivial wrapper around os_write_file */ + int generic_write(int fd, const char *buf, int n, void *unused) { - int count; - - count = write(fd, buf, n); - if(count < 0) return(-errno); - return(count); + return(os_write_file(fd, buf, n)); } int generic_console_write(int fd, const char *buf, int n, void *unused) @@ -68,15 +65,18 @@ int generic_console_write(int fd, const int generic_window_size(int fd, void *unused, unsigned short *rows_out, unsigned short *cols_out) { - struct winsize size; - int ret = 0; + int rows, cols; + int ret; + + ret = os_window_size(fd, &rows, &cols); + if(ret < 0) + return(ret); + + ret = ((*rows_out != rows) || (*cols_out != cols)); + + *rows_out = rows; + *cols_out = cols; - if(ioctl(fd, TIOCGWINSZ, &size) == 0){ - ret = ((*rows_out != size.ws_row) || - (*cols_out != size.ws_col)); - *rows_out = size.ws_row; - *cols_out = size.ws_col; - } return(ret); } @@ -100,14 +100,16 @@ static int winch_thread(void *arg) struct winch_data *data = arg; sigset_t sigs; int pty_fd, pipe_fd; + int count, err; char c = 1; - close(data->close_me); + os_close_file(data->close_me); pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; - if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)) + count = os_write_file(pipe_fd, &c, sizeof(c)); + if(count != sizeof(c)) printk("winch_thread : failed to write synchronization " - "byte, errno = %d\n", errno); + "byte, err = %d\n", -count); signal(SIGWINCH, winch_handler); sigfillset(&sigs); @@ -123,26 +125,24 @@ static int winch_thread(void *arg) exit(1); } - if(ioctl(pty_fd, TIOCSCTTY, 0) < 0){ - printk("winch_thread : TIOCSCTTY failed, errno = %d\n", errno); - exit(1); - } - if(tcsetpgrp(pty_fd, os_getpid()) < 0){ - printk("winch_thread : tcsetpgrp failed, errno = %d\n", errno); + err = os_new_tty_pgrp(pty_fd, os_getpid()); + if(err < 0){ + printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err); exit(1); } - if(read(pipe_fd, &c, sizeof(c)) != sizeof(c)) + count = os_read_file(pipe_fd, &c, sizeof(c)); + if(count != sizeof(c)) printk("winch_thread : failed to read synchronization byte, " - "errno = %d\n", errno); + "err = %d\n", -count); while(1){ pause(); - if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)){ - printk("winch_thread : write failed, errno = %d\n", - errno); - } + count = os_write_file(pipe_fd, &c, sizeof(c)); + if(count != sizeof(c)) + printk("winch_thread : write failed, err = %d\n", + -count); } } @@ -154,8 +154,8 @@ static int winch_tramp(int fd, void *dev char c; err = os_pipe(fds, 1, 1); - if(err){ - printk("winch_tramp : os_pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("winch_tramp : os_pipe failed, err = %d\n", -err); return(err); } @@ -168,12 +168,12 @@ static int winch_tramp(int fd, void *dev return(pid); } - close(fds[1]); + os_close_file(fds[1]); *fd_out = fds[0]; - n = read(fds[0], &c, sizeof(c)); + n = os_read_file(fds[0], &c, sizeof(c)); if(n != sizeof(c)){ printk("winch_tramp : failed to read synchronization byte\n"); - printk("read returned %d, errno = %d\n", n, errno); + printk("read failed, err = %d\n", -n); printk("fd %d will not support SIGWINCH\n", fd); *fd_out = -1; } @@ -183,20 +183,24 @@ static int winch_tramp(int fd, void *dev void register_winch(int fd, void *device_data) { int pid, thread, thread_fd; + int count; char c = 1; - if(!isatty(fd)) return; + if(!isatty(fd)) + return; pid = tcgetpgrp(fd); - if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) && - (pid == -1)){ + if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, + device_data) && (pid == -1)){ thread = winch_tramp(fd, device_data, &thread_fd); if(fd != -1){ register_winch_irq(thread_fd, fd, thread, device_data); - if(write(thread_fd, &c, sizeof(c)) != sizeof(c)) + count = os_write_file(thread_fd, &c, sizeof(c)); + if(count != sizeof(c)) printk("register_winch : failed to write " - "synchronization byte\n"); + "synchronization byte, err = %d\n", + -count); } } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow.h 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,41 @@ +#ifndef __COW_H__ +#define __COW_H__ + +#include + +#if __BYTE_ORDER == __BIG_ENDIAN +# define ntohll(x) (x) +# define htonll(x) (x) +#elif __BYTE_ORDER == __LITTLE_ENDIAN +# define ntohll(x) bswap_64(x) +# define htonll(x) bswap_64(x) +#else +#error "__BYTE_ORDER not defined" +#endif + +extern int init_cow_file(int fd, char *cow_file, char *backing_file, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out); + +extern int file_reader(__u64 offset, char *buf, int len, void *arg); +extern int read_cow_header(int (*reader)(__u64, char *, int, void *), + void *arg, __u32 *version_out, + char **backing_file_out, time_t *mtime_out, + __u64 *size_out, int *sectorsize_out, + __u32 *align_out, int *bitmap_offset_out); + +extern int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, long long *size); + +extern void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out); + +#endif + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,630 @@ +#define COW_MAJOR 60 +#define MAJOR_NR COW_MAJOR + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "2_5compat.h" +#include "cow.h" +#include "ubd_user.h" + +#define COW_SHIFT 4 + +struct cow { + int count; + char *cow_path; + dev_t cow_dev; + struct block_device *cow_bdev; + char *backing_path; + dev_t backing_dev; + struct block_device *backing_bdev; + int sectorsize; + unsigned long *bitmap; + unsigned long bitmap_len; + int bitmap_offset; + int data_offset; + devfs_handle_t devfs; + struct semaphore sem; + struct semaphore io_sem; + atomic_t working; + spinlock_t io_lock; + struct buffer_head *bh; + struct buffer_head *bhtail; + void *end_io; +}; + +#define DEFAULT_COW { \ + .count = 0, \ + .cow_path = NULL, \ + .cow_dev = 0, \ + .backing_path = NULL, \ + .backing_dev = 0, \ + .bitmap = NULL, \ + .bitmap_len = 0, \ + .bitmap_offset = 0, \ + .data_offset = 0, \ + .devfs = NULL, \ + .working = ATOMIC_INIT(0), \ + .io_lock = SPIN_LOCK_UNLOCKED, \ +} + +#define MAX_DEV (8) +#define MAX_MINOR (MAX_DEV << COW_SHIFT) + +struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW }; + +/* Not modified by this driver */ +static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; +static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; + +/* Protected by cow_lock */ +static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; + +static struct hd_struct cow_part[MAX_MINOR] = + { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; + +/* Protected by io_request_lock */ +static request_queue_t *cow_queue; + +static int cow_open(struct inode *inode, struct file *filp); +static int cow_release(struct inode * inode, struct file * file); +static int cow_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg); +static int cow_revalidate(kdev_t rdev); + +static struct block_device_operations cow_blops = { + .open = cow_open, + .release = cow_release, + .ioctl = cow_ioctl, + .revalidate = cow_revalidate, +}; + +/* Initialized in an initcall, and unchanged thereafter */ +devfs_handle_t cow_dir_handle; + +#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ +{ \ + .major = maj, \ + .major_name = name, \ + .minor_shift = shift, \ + .max_p = 1 << shift, \ + .part = parts, \ + .sizes = bsizes, \ + .nr_real = max, \ + .real_devices = NULL, \ + .next = NULL, \ + .fops = blops, \ + .de_arr = NULL, \ + .flags = 0 \ +} + +static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED; + +static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part, + COW_SHIFT, sizes, MAX_DEV, + &cow_blops); + +static int cow_add(int n) +{ + struct cow *dev = &cow_dev[n]; + char name[sizeof("nnnnnn\0")]; + int err = -ENODEV; + + if(dev->cow_path == NULL) + goto out; + + sprintf(name, "%d", n); + dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE, + MAJOR_NR, n << COW_SHIFT, S_IFBLK | + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, + &cow_blops, NULL); + + init_MUTEX_LOCKED(&dev->sem); + init_MUTEX(&dev->io_sem); + + return(0); + + out: + return(err); +} + +/* + * Add buffer_head to back of pending list + */ +static void cow_add_bh(struct cow *cow, struct buffer_head *bh) +{ + unsigned long flags; + + spin_lock_irqsave(&cow->io_lock, flags); + if(cow->bhtail != NULL){ + cow->bhtail->b_reqnext = bh; + cow->bhtail = bh; + } + else { + cow->bh = bh; + cow->bhtail = bh; + } + spin_unlock_irqrestore(&cow->io_lock, flags); +} + +/* +* Grab first pending buffer +*/ +static struct buffer_head *cow_get_bh(struct cow *cow) +{ + struct buffer_head *bh; + + spin_lock_irq(&cow->io_lock); + bh = cow->bh; + if(bh != NULL){ + if(bh == cow->bhtail) + cow->bhtail = NULL; + cow->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + } + spin_unlock_irq(&cow->io_lock); + + return(bh); +} + +static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, + struct buffer_head **cow_bh, int ncow_bh) +{ + int i; + + if(ncow_bh > 0) + ll_rw_block(WRITE, ncow_bh, cow_bh); + + for(i = 0; i < ncow_bh ; i++){ + wait_on_buffer(cow_bh[i]); + brelse(cow_bh[i]); + } + + ll_rw_block(WRITE, 1, &bh); + brelse(bh); +} + +static struct buffer_head *cow_new_bh(struct cow *dev, int sector) +{ + struct buffer_head *bh; + + sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize; + bh = getblk(dev->cow_dev, sector, dev->sectorsize); + memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])), + dev->sectorsize); + return(bh); +} + +/* Copied from loop.c, needed to avoid deadlocking in make_request. */ + +static int cow_thread(void *data) +{ + struct cow *dev = data; + struct buffer_head *bh; + + daemonize(); + exit_files(current); + + sprintf(current->comm, "cow%d", dev - cow_dev); + + spin_lock_irq(¤t->sigmask_lock); + sigfillset(¤t->blocked); + flush_signals(current); + spin_unlock_irq(¤t->sigmask_lock); + + atomic_inc(&dev->working); + + current->policy = SCHED_OTHER; + current->nice = -20; + + current->flags |= PF_NOIO; + + /* + * up sem, we are running + */ + up(&dev->sem); + + for(;;){ + int start, len, nbh, i, update_bitmap = 0; + struct buffer_head *cow_bh[2]; + + down_interruptible(&dev->io_sem); + /* + * could be upped because of tear-down, not because of + * pending work + */ + if(!atomic_read(&dev->working)) + break; + + bh = cow_get_bh(dev); + if(bh == NULL){ + printk(KERN_ERR "cow: missing bh\n"); + continue; + } + + start = bh->b_blocknr * bh->b_size / dev->sectorsize; + len = bh->b_size / dev->sectorsize; + for(i = 0; i < len ; i++){ + if(ubd_test_bit(start + i, + (unsigned char *) dev->bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(start + i, (unsigned char *) dev->bitmap); + } + + cow_bh[0] = NULL; + cow_bh[1] = NULL; + nbh = 0; + if(update_bitmap){ + cow_bh[0] = cow_new_bh(dev, start); + nbh++; + if(start / dev->sectorsize != + (start + len) / dev->sectorsize){ + cow_bh[1] = cow_new_bh(dev, start + len); + nbh++; + } + } + + bh->b_dev = dev->cow_dev; + bh->b_blocknr += dev->data_offset / dev->sectorsize; + + cow_handle_bh(dev, bh, cow_bh, nbh); + + /* + * upped both for pending work and tear-down, lo_pending + * will hit zero then + */ + if(atomic_dec_and_test(&dev->working)) + break; + } + + up(&dev->sem); + return(0); +} + +static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh) +{ + struct cow *dev; + int n, minor; + + minor = MINOR(bh->b_rdev); + n = minor >> COW_SHIFT; + dev = &cow_dev[n]; + + dev->end_io = NULL; + if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){ + bh->b_rdev = dev->cow_dev; + bh->b_rsector += dev->data_offset / dev->sectorsize; + } + else if(rw == WRITE){ + bh->b_dev = dev->cow_dev; + bh->b_blocknr += dev->data_offset / dev->sectorsize; + + cow_add_bh(dev, bh); + up(&dev->io_sem); + return(0); + } + else { + bh->b_rdev = dev->backing_dev; + } + + return(1); +} + +int cow_init(void) +{ + int i; + + cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL); + if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) { + printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR); + return -1; + } + read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ + blksize_size[MAJOR_NR] = blk_sizes; + blk_size[MAJOR_NR] = sizes; + INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); + + cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); + blk_init_queue(cow_queue, NULL); + INIT_ELV(cow_queue, &cow_queue->elevator); + blk_queue_make_request(cow_queue, cow_make_request); + + add_gendisk(&cow_gendisk); + + for(i=0;i 0){ + n = (left > blocksize) ? blocksize : left; + + bh = bread(dev, block, (n < 512) ? 512 : n); + if(bh == NULL) + return(-EIO); + + n -= offset; + memcpy(&buf[cur], bh->b_data + offset, n); + block++; + left -= n; + cur += n; + offset = 0; + brelse(bh); + } + + return(count); +} + +static int cow_open(struct inode *inode, struct file *filp) +{ + int (*dev_ioctl)(struct inode *, struct file *, unsigned int, + unsigned long); + mm_segment_t fs; + struct cow *dev; + __u64 size; + __u32 version, align; + time_t mtime; + char *backing_file; + int n, offset, err = 0; + + n = DEVICE_NR(inode->i_rdev); + if(n >= MAX_DEV) + return(-ENODEV); + dev = &cow_dev[n]; + offset = n << COW_SHIFT; + + spin_lock(&cow_lock); + + if(dev->count == 0){ + dev->cow_dev = name_to_kdev_t(dev->cow_path); + if(dev->cow_dev == 0){ + printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " + "failed\n", dev->cow_path); + err = -ENODEV; + } + + dev->backing_dev = name_to_kdev_t(dev->backing_path); + if(dev->backing_dev == 0){ + printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " + "failed\n", dev->backing_path); + err = -ENODEV; + } + + if(err) + goto out; + + dev->cow_bdev = bdget(dev->cow_dev); + if(dev->cow_bdev == NULL){ + printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", + dev->cow_path); + err = -ENOMEM; + } + dev->backing_bdev = bdget(dev->backing_dev); + if(dev->backing_bdev == NULL){ + printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", + dev->backing_path); + err = -ENOMEM; + } + + if(err) + goto out; + + err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, + BDEV_RAW); + if(err){ + printk("cow_open - blkdev_get of COW device failed, " + "error = %d\n", err); + goto out; + } + + err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW); + if(err){ + printk("cow_open - blkdev_get of backing device " + "failed, error = %d\n", err); + goto out; + } + + err = read_cow_header(reader, &dev->cow_dev, &version, + &backing_file, &mtime, &size, + &dev->sectorsize, &align, + &dev->bitmap_offset); + if(err){ + printk(KERN_ERR "cow_open - read_cow_header failed, " + "err = %d\n", err); + goto out; + } + + cow_sizes(version, size, dev->sectorsize, align, + dev->bitmap_offset, &dev->bitmap_len, + &dev->data_offset); + dev->bitmap = (void *) vmalloc(dev->bitmap_len); + if(dev->bitmap == NULL){ + err = -ENOMEM; + printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); + goto out; + } + flush_tlb_kernel_vm(); + + err = reader(dev->bitmap_offset, (char *) dev->bitmap, + dev->bitmap_len, &dev->cow_dev); + if(err < 0){ + printk(KERN_ERR "Failed to read COW bitmap\n"); + vfree(dev->bitmap); + goto out; + } + + dev_ioctl = dev->backing_bdev->bd_op->ioctl; + fs = get_fs(); + set_fs(KERNEL_DS); + err = (*dev_ioctl)(inode, filp, BLKGETSIZE, + (unsigned long) &sizes[offset]); + set_fs(fs); + if(err){ + printk(KERN_ERR "cow_open - BLKGETSIZE failed, " + "error = %d\n", err); + goto out; + } + + kernel_thread(cow_thread, dev, + CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&dev->sem); + } + dev->count++; + out: + spin_unlock(&cow_lock); + return(err); +} + +static int cow_release(struct inode * inode, struct file * file) +{ + struct cow *dev; + int n, err; + + n = DEVICE_NR(inode->i_rdev); + if(n >= MAX_DEV) + return(-ENODEV); + dev = &cow_dev[n]; + + spin_lock(&cow_lock); + + if(--dev->count > 0) + goto out; + + err = blkdev_put(dev->cow_bdev, BDEV_RAW); + if(err) + printk("cow_release - blkdev_put of cow device failed, " + "error = %d\n", err); + bdput(dev->cow_bdev); + dev->cow_bdev = 0; + + err = blkdev_put(dev->backing_bdev, BDEV_RAW); + if(err) + printk("cow_release - blkdev_put of backing device failed, " + "error = %d\n", err); + bdput(dev->backing_bdev); + dev->backing_bdev = 0; + + out: + spin_unlock(&cow_lock); + return(0); +} + +static int cow_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct cow *dev; + int (*dev_ioctl)(struct inode *, struct file *, unsigned int, + unsigned long); + int n; + + n = DEVICE_NR(inode->i_rdev); + if(n >= MAX_DEV) + return(-ENODEV); + dev = &cow_dev[n]; + + dev_ioctl = dev->backing_bdev->bd_op->ioctl; + return((*dev_ioctl)(inode, file, cmd, arg)); +} + +static int cow_revalidate(kdev_t rdev) +{ + printk(KERN_ERR "Need to implement cow_revalidate\n"); + return(0); +} + +static int parse_unit(char **ptr) +{ + char *str = *ptr, *end; + int n = -1; + + if(isdigit(*str)) { + n = simple_strtoul(str, &end, 0); + if(end == str) + return(-1); + *ptr = end; + } + else if (('a' <= *str) && (*str <= 'h')) { + n = *str - 'a'; + str++; + *ptr = str; + } + return(n); +} + +static int cow_setup(char *str) +{ + struct cow *dev; + char *cow_name, *backing_name; + int unit; + + unit = parse_unit(&str); + if(unit < 0){ + printk(KERN_ERR "cow_setup - Couldn't parse unit number\n"); + return(1); + } + + if(*str != '='){ + printk(KERN_ERR "cow_setup - Missing '=' after unit " + "number\n"); + return(1); + } + str++; + + cow_name = str; + backing_name = strchr(str, ','); + if(backing_name == NULL){ + printk(KERN_ERR "cow_setup - missing backing device name\n"); + return(0); + } + *backing_name = '\0'; + backing_name++; + + spin_lock(&cow_lock); + + dev = &cow_dev[unit]; + dev->cow_path = cow_name; + dev->backing_path = backing_name; + + spin_unlock(&cow_lock); + return(0); +} + +__setup("cow", cow_setup); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow_sys.h 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,48 @@ +#ifndef __COW_SYS_H__ +#define __COW_SYS_H__ + +#include "kern_util.h" +#include "user_util.h" +#include "os.h" +#include "user.h" + +static inline void *cow_malloc(int size) +{ + return(um_kmalloc(size)); +} + +static inline void cow_free(void *ptr) +{ + kfree(ptr); +} + +#define cow_printf printk + +static inline char *cow_strdup(char *str) +{ + return(uml_strdup(str)); +} + +static inline int cow_seek_file(int fd, __u64 offset) +{ + return(os_seek_file(fd, offset)); +} + +static inline int cow_file_size(char *file, __u64 *size_out) +{ + return(os_file_size(file, size_out)); +} + +static inline int cow_write_file(int fd, char *buf, int size) +{ + return(os_write_file(fd, buf, size)); +} + +#endif + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/drivers/cow_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,375 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "os.h" + +#include "cow.h" +#include "cow_sys.h" + +#define PATH_LEN_V1 256 + +struct cow_header_v1 { + int magic; + int version; + char backing_file[PATH_LEN_V1]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +#define PATH_LEN_V2 MAXPATHLEN + +struct cow_header_v2 { + unsigned long magic; + unsigned long version; + char backing_file[PATH_LEN_V2]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in + * case other systems have different values for MAXPATHLEN + */ +#define PATH_LEN_V3 4096 + +/* Changes from V2 - + * PATH_LEN_V3 as described above + * Explicitly specify field bit lengths for systems with different + * lengths for the usual C types. Not sure whether char or + * time_t should be changed, this can be changed later without + * breaking compatibility + * Add alignment field so that different alignments can be used for the + * bitmap and data + * Add cow_format field to allow for the possibility of different ways + * of specifying the COW blocks. For now, the only value is 0, + * for the traditional COW bitmap. + * Move the backing_file field to the end of the header. This allows + * for the possibility of expanding it into the padding required + * by the bitmap alignment. + * The bitmap and data portions of the file will be aligned as specified + * by the alignment field. This is to allow COW files to be + * put on devices with restrictions on access alignments, such as + * /dev/raw, with a 512 byte alignment restriction. This also + * allows the data to be more aligned more strictly than on + * sector boundaries. This is needed for ubd-mmap, which needs + * the data to be page aligned. + * Fixed (finally!) the rounding bug + */ + +struct cow_header_v3 { + __u32 magic; + __u32 version; + time_t mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +}; + +/* COW format definitions - for now, we have only the usual COW bitmap */ +#define COW_BITMAP 0 + +union cow_header { + struct cow_header_v1 v1; + struct cow_header_v2 v2; + struct cow_header_v3 v3; +}; + +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 3 + +#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) +#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) + +void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out) +{ + if(version < 3){ + *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = (*data_offset_out + sectorsize - 1) / + sectorsize; + *data_offset_out *= sectorsize; + } + else { + *bitmap_len_out = DIV_ROUND(size, sectorsize); + *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = ROUND_UP(*data_offset_out, align); + } +} + +static int absolutize(char *to, int size, char *from) +{ + char save_cwd[256], *slash; + int remaining; + + if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { + cow_printf("absolutize : unable to get cwd - errno = %d\n", + errno); + return(-1); + } + slash = strrchr(from, '/'); + if(slash != NULL){ + *slash = '\0'; + if(chdir(from)){ + *slash = '/'; + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + *slash = '/'; + if(getcwd(to, size) == NULL){ + cow_printf("absolutize : unable to get cwd of '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + remaining = size - strlen(to); + if(strlen(slash) + 1 > remaining){ + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcat(to, slash); + } + else { + if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcpy(to, save_cwd); + strcat(to, "/"); + strcat(to, from); + } + chdir(save_cwd); + return(0); +} + +int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, long long *size) +{ + struct cow_header_v3 *header; + unsigned long modtime; + int err; + + err = cow_seek_file(fd, 0); + if(err < 0){ + cow_printf("write_cow_header - lseek failed, err = %d\n", -err); + goto out; + } + + err = -ENOMEM; + header = cow_malloc(sizeof(*header)); + if(header == NULL){ + cow_printf("Failed to allocate COW V3 header\n"); + goto out; + } + header->magic = htonl(COW_MAGIC); + header->version = htonl(COW_VERSION); + + err = -EINVAL; + if(strlen(backing_file) > sizeof(header->backing_file) - 1){ + cow_printf("Backing file name \"%s\" is too long - names are " + "limited to %d characters\n", backing_file, + sizeof(header->backing_file) - 1); + goto out_free; + } + + if(absolutize(header->backing_file, sizeof(header->backing_file), + backing_file)) + goto out_free; + + err = os_file_modtime(header->backing_file, &modtime); + if(err < 0){ + cow_printf("Backing file '%s' mtime request failed, " + "err = %d\n", header->backing_file, -err); + goto out_free; + } + + err = cow_file_size(header->backing_file, size); + if(err < 0){ + cow_printf("Couldn't get size of backing file '%s', " + "err = %d\n", header->backing_file, -err); + goto out_free; + } + + header->mtime = htonl(modtime); + header->size = htonll(*size); + header->sectorsize = htonl(sectorsize); + header->alignment = htonl(alignment); + header->cow_format = COW_BITMAP; + + err = os_write_file(fd, header, sizeof(*header)); + if(err != sizeof(*header)){ + cow_printf("Write of header to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + goto out_free; + } + err = 0; + out_free: + cow_free(header); + out: + return(err); +} + +int file_reader(__u64 offset, char *buf, int len, void *arg) +{ + int fd = *((int *) arg); + + return(pread(fd, buf, len, offset)); +} + +/* XXX Need to sanity-check the values read from the header */ + +int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, + __u32 *version_out, char **backing_file_out, + time_t *mtime_out, __u64 *size_out, + int *sectorsize_out, __u32 *align_out, + int *bitmap_offset_out) +{ + union cow_header *header; + char *file; + int err, n; + unsigned long version, magic; + + header = cow_malloc(sizeof(*header)); + if(header == NULL){ + cow_printf("read_cow_header - Failed to allocate header\n"); + return(-ENOMEM); + } + err = -EINVAL; + n = (*reader)(0, (char *) header, sizeof(*header), arg); + if(n < offsetof(typeof(header->v1), backing_file)){ + cow_printf("read_cow_header - short header\n"); + goto out; + } + + magic = header->v1.magic; + if(magic == COW_MAGIC) { + version = header->v1.version; + } + else if(magic == ntohl(COW_MAGIC)){ + version = ntohl(header->v1.version); + } + /* No error printed because the non-COW case comes through here */ + else goto out; + + *version_out = version; + + if(version == 1){ + if(n < sizeof(header->v1)){ + cow_printf("read_cow_header - failed to read V1 " + "header\n"); + goto out; + } + *mtime_out = header->v1.mtime; + *size_out = header->v1.size; + *sectorsize_out = header->v1.sectorsize; + *bitmap_offset_out = sizeof(header->v1); + *align_out = *sectorsize_out; + file = header->v1.backing_file; + } + else if(version == 2){ + if(n < sizeof(header->v2)){ + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v2.mtime); + *size_out = ntohll(header->v2.size); + *sectorsize_out = ntohl(header->v2.sectorsize); + *bitmap_offset_out = sizeof(header->v2); + *align_out = *sectorsize_out; + file = header->v2.backing_file; + } + else if(version == 3){ + if(n < sizeof(header->v3)){ + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v3.mtime); + *size_out = ntohll(header->v3.size); + *sectorsize_out = ntohl(header->v3.sectorsize); + *align_out = ntohl(header->v3.alignment); + *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); + file = header->v3.backing_file; + } + else { + cow_printf("read_cow_header - invalid COW version\n"); + goto out; + } + err = -ENOMEM; + *backing_file_out = cow_strdup(file); + if(*backing_file_out == NULL){ + cow_printf("read_cow_header - failed to allocate backing " + "file\n"); + goto out; + } + err = 0; + out: + cow_free(header); + return(err); +} + +int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, + int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + __u64 size, offset; + char zero = 0; + int err; + + err = write_cow_header(cow_file, fd, backing_file, sectorsize, + alignment, &size); + if(err) + goto out; + + *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); + cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + offset = *data_offset_out + size - sizeof(zero); + err = cow_seek_file(fd, offset); + if(err < 0){ + cow_printf("cow bitmap lseek failed : err = %d\n", -err); + goto out; + } + + /* does not really matter how much we write it is just to set EOF + * this also sets the entire COW bitmap + * to zero without having to allocate it + */ + err = cow_write_file(fd, &zero, sizeof(zero)); + if(err != sizeof(zero)){ + cow_printf("Write of bitmap to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + err = -EINVAL; + goto out; + } + + return(0); + + out: + return(err); +} + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/drivers/daemon_user.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/drivers/daemon_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -53,7 +53,8 @@ static int connect_to_switch(struct daem struct request_v3 req; int fd, n, err; - if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){ + pri->control = socket(AF_UNIX, SOCK_STREAM, 0); + if(pri->control < 0){ printk("daemon_open : control socket failed, errno = %d\n", errno); return(-errno); @@ -67,7 +68,8 @@ static int connect_to_switch(struct daem goto out; } - if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){ + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if(fd < 0){ printk("daemon_open : data socket failed, errno = %d\n", errno); err = -errno; @@ -91,18 +93,18 @@ static int connect_to_switch(struct daem req.version = SWITCH_VERSION; req.type = REQ_NEW_CONTROL; req.sock = *local_addr; - n = write(pri->control, &req, sizeof(req)); + n = os_write_file(pri->control, &req, sizeof(req)); if(n != sizeof(req)){ - printk("daemon_open : control setup request returned %d, " - "errno = %d\n", n, errno); + printk("daemon_open : control setup request failed, err = %d\n", + -n); err = -ENOTCONN; goto out; } - n = read(pri->control, sun, sizeof(*sun)); + n = os_read_file(pri->control, sun, sizeof(*sun)); if(n != sizeof(*sun)){ - printk("daemon_open : read of data socket returned %d, " - "errno = %d\n", n, errno); + printk("daemon_open : read of data socket failed, err = %d\n", + -n); err = -ENOTCONN; goto out_close; } @@ -111,9 +113,9 @@ static int connect_to_switch(struct daem return(fd); out_close: - close(fd); + os_close_file(fd); out: - close(pri->control); + os_close_file(pri->control); return(err); } @@ -153,8 +155,8 @@ static void daemon_remove(void *data) { struct daemon_data *pri = data; - close(pri->fd); - close(pri->control); + os_close_file(pri->fd); + os_close_file(pri->control); if(pri->data_addr != NULL) kfree(pri->data_addr); if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); if(pri->local_addr != NULL) kfree(pri->local_addr); --- linux-2.6.2-rc1/arch/um/drivers/fd.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/drivers/fd.c 2004-01-21 23:30:29.000000000 -0800 @@ -35,7 +35,8 @@ void *fd_init(char *str, int device, str printk("fd_init : couldn't parse file descriptor '%s'\n", str); return(NULL); } - if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + data = um_kmalloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct fd_chan) { .fd = n, .raw = opts->raw }); return(data); --- linux-2.6.2-rc1/arch/um/drivers/harddog_user.c 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/drivers/harddog_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -27,10 +27,10 @@ static void pre_exec(void *d) dup2(data->stdin, 0); dup2(data->stdout, 1); dup2(data->stdout, 2); - close(data->stdin); - close(data->stdout); - close(data->close_me[0]); - close(data->close_me[1]); + os_close_file(data->stdin); + os_close_file(data->stdout); + os_close_file(data->close_me[0]); + os_close_file(data->close_me[1]); } int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) @@ -44,15 +44,15 @@ int start_watchdog(int *in_fd_ret, int * char **args = NULL; err = os_pipe(in_fds, 1, 0); - if(err){ - printk("harddog_open - os_pipe failed, errno = %d\n", -err); - return(err); + if(err < 0){ + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out; } err = os_pipe(out_fds, 1, 0); - if(err){ - printk("harddog_open - os_pipe failed, errno = %d\n", -err); - return(err); + if(err < 0){ + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out_close_in; } data.stdin = out_fds[0]; @@ -72,42 +72,47 @@ int start_watchdog(int *in_fd_ret, int * pid = run_helper(pre_exec, &data, args, NULL); - close(out_fds[0]); - close(in_fds[1]); + os_close_file(out_fds[0]); + os_close_file(in_fds[1]); if(pid < 0){ err = -pid; - printk("harddog_open - run_helper failed, errno = %d\n", err); - goto out; + printk("harddog_open - run_helper failed, errno = %d\n", -err); + goto out_close_out; } - n = read(in_fds[0], &c, sizeof(c)); + n = os_read_file(in_fds[0], &c, sizeof(c)); if(n == 0){ printk("harddog_open - EOF on watchdog pipe\n"); helper_wait(pid); err = -EIO; - goto out; + goto out_close_out; } else if(n < 0){ printk("harddog_open - read of watchdog pipe failed, " - "errno = %d\n", errno); + "err = %d\n", -n); helper_wait(pid); - err = -errno; - goto out; + err = n; + goto out_close_out; } *in_fd_ret = in_fds[0]; *out_fd_ret = out_fds[1]; return(0); + + out_close_in: + os_close_file(in_fds[0]); + os_close_file(in_fds[1]); + out_close_out: + os_close_file(out_fds[0]); + os_close_file(out_fds[1]); out: - close(out_fds[1]); - close(in_fds[0]); return(err); } void stop_watchdog(int in_fd, int out_fd) { - close(in_fd); - close(out_fd); + os_close_file(in_fd); + os_close_file(out_fd); } int ping_watchdog(int fd) @@ -115,11 +120,12 @@ int ping_watchdog(int fd) int n; char c = '\n'; - n = write(fd, &c, sizeof(c)); - if(n < sizeof(c)){ - printk("ping_watchdog - write failed, errno = %d\n", - errno); - return(-errno); + n = os_write_file(fd, &c, sizeof(c)); + if(n != sizeof(c)){ + printk("ping_watchdog - write failed, err = %d\n", -n); + if(n < 0) + return(n); + return(-EIO); } return 1; --- linux-2.6.2-rc1/arch/um/drivers/hostaudio_kern.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/drivers/hostaudio_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,12 +5,12 @@ #include "linux/config.h" #include "linux/module.h" -#include "linux/version.h" #include "linux/init.h" #include "linux/slab.h" #include "linux/fs.h" #include "linux/sound.h" #include "linux/soundcard.h" +#include "asm/uaccess.h" #include "kern_util.h" #include "init.h" #include "hostaudio.h" @@ -19,30 +19,39 @@ char *dsp = HOSTAUDIO_DEV_DSP; char *mixer = HOSTAUDIO_DEV_MIXER; +#define DSP_HELP \ +" This is used to specify the host dsp device to the hostaudio driver.\n" \ +" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" + +#define MIXER_HELP \ +" This is used to specify the host mixer device to the hostaudio driver.\n" \ +" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" + #ifndef MODULE static int set_dsp(char *name, int *add) { - dsp = uml_strdup(name); + dsp = name; return(0); } -__uml_setup("dsp=", set_dsp, -"dsp=\n" -" This is used to specify the host dsp device to the hostaudio driver.\n" -" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" -); +__uml_setup("dsp=", set_dsp, "dsp=\n" DSP_HELP); static int set_mixer(char *name, int *add) { - mixer = uml_strdup(name); + mixer = name; return(0); } -__uml_setup("mixer=", set_mixer, -"mixer=\n" -" This is used to specify the host mixer device to the hostaudio driver.\n" -" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" -); +__uml_setup("mixer=", set_mixer, "mixer=\n" MIXER_HELP); + +#else /*MODULE*/ + +MODULE_PARM(dsp, "s"); +MODULE_PARM_DESC(dsp, DSP_HELP); + +MODULE_PARM(mixer, "s"); +MODULE_PARM_DESC(mixer, MIXER_HELP); + #endif /* /dev/dsp file operations */ @@ -51,23 +60,55 @@ static ssize_t hostaudio_read(struct fil loff_t *ppos) { struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; #ifdef DEBUG printk("hostaudio: read called, count = %d\n", count); #endif - return(hostaudio_read_user(state, buffer, count, ppos)); + kbuf = kmalloc(count, GFP_KERNEL); + if(kbuf == NULL) + return(-ENOMEM); + + err = hostaudio_read_user(state, kbuf, count, ppos); + if(err < 0) + goto out; + + if(copy_to_user(buffer, kbuf, err)) + err = -EFAULT; + + out: + kfree(kbuf); + return(err); } static ssize_t hostaudio_write(struct file *file, const char *buffer, size_t count, loff_t *ppos) { struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; #ifdef DEBUG printk("hostaudio: write called, count = %d\n", count); #endif - return(hostaudio_write_user(state, buffer, count, ppos)); + + kbuf = kmalloc(count, GFP_KERNEL); + if(kbuf == NULL) + return(-ENOMEM); + + err = -EFAULT; + if(copy_from_user(kbuf, buffer, count)) + goto out; + + err = hostaudio_write_user(state, kbuf, count, ppos); + if(err < 0) + goto out; + + out: + kfree(kbuf); + return(err); } static unsigned int hostaudio_poll(struct file *file, @@ -86,12 +127,43 @@ static int hostaudio_ioctl(struct inode unsigned int cmd, unsigned long arg) { struct hostaudio_state *state = file->private_data; + unsigned long data = 0; + int err; #ifdef DEBUG printk("hostaudio: ioctl called, cmd = %u\n", cmd); #endif + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if(get_user(data, (int *) arg)) + return(-EFAULT); + break; + default: + break; + } + + err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data); + + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if(put_user(data, (int *) arg)) + return(-EFAULT); + break; + default: + break; + } - return(hostaudio_ioctl_user(state, cmd, arg)); + return(err); } static int hostaudio_open(struct inode *inode, struct file *file) @@ -225,7 +297,8 @@ MODULE_LICENSE("GPL"); static int __init hostaudio_init_module(void) { - printk(KERN_INFO "UML Audio Relay\n"); + printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", + dsp, mixer); module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); if(module_data.dev_audio < 0){ --- linux-2.6.2-rc1/arch/um/drivers/hostaudio_user.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/drivers/hostaudio_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -4,9 +4,6 @@ */ #include -#include -#include -#include #include #include #include "hostaudio.h" @@ -20,45 +17,31 @@ ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, size_t count, loff_t *ppos) { - ssize_t ret; - #ifdef DEBUG printk("hostaudio: read_user called, count = %d\n", count); #endif - ret = read(state->fd, buffer, count); - - if(ret < 0) return(-errno); - return(ret); + return(os_read_file(state->fd, buffer, count)); } ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer, size_t count, loff_t *ppos) { - ssize_t ret; - #ifdef DEBUG printk("hostaudio: write_user called, count = %d\n", count); #endif - ret = write(state->fd, buffer, count); - - if(ret < 0) return(-errno); - return(ret); + return(os_write_file(state->fd, buffer, count)); } int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, unsigned long arg) { - int ret; #ifdef DEBUG printk("hostaudio: ioctl_user called, cmd = %u\n", cmd); #endif - ret = ioctl(state->fd, cmd, arg); - - if(ret < 0) return(-errno); - return(ret); + return(os_ioctl_generic(state->fd, cmd, arg)); } int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp) @@ -67,14 +50,15 @@ int hostaudio_open_user(struct hostaudio printk("hostaudio: open_user called\n"); #endif - state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); - - if(state->fd >= 0) return(0); + state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); - printk("hostaudio_open_user failed to open '%s', errno = %d\n", - dsp, errno); + if(state->fd < 0) { + printk("hostaudio_open_user failed to open '%s', err = %d\n", + dsp, -state->fd); + return(state->fd); + } - return(-errno); + return(0); } int hostaudio_release_user(struct hostaudio_state *state) @@ -82,10 +66,10 @@ int hostaudio_release_user(struct hostau #ifdef DEBUG printk("hostaudio: release called\n"); #endif - if(state->fd >= 0){ - close(state->fd); - state->fd=-1; - } + if(state->fd >= 0){ + os_close_file(state->fd); + state->fd = -1; + } return(0); } @@ -95,15 +79,11 @@ int hostaudio_release_user(struct hostau int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, unsigned int cmd, unsigned long arg) { - int ret; #ifdef DEBUG printk("hostmixer: ioctl_user called cmd = %u\n",cmd); #endif - ret = ioctl(state->fd, cmd, arg); - if(ret < 0) - return(-errno); - return(ret); + return(os_ioctl_generic(state->fd, cmd, arg)); } int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w, @@ -115,12 +95,13 @@ int hostmixer_open_mixdev_user(struct ho state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); - if(state->fd >= 0) return(0); - - printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n", - mixer, errno); + if(state->fd < 0) { + printk("hostaudio_open_mixdev_user failed to open '%s', " + "err = %d\n", mixer, state->fd); + return(state->fd); + } - return(-errno); + return(0); } int hostmixer_release_mixdev_user(struct hostmixer_state *state) @@ -130,7 +111,7 @@ int hostmixer_release_mixdev_user(struct #endif if(state->fd >= 0){ - close(state->fd); + os_close_file(state->fd); state->fd = -1; } --- linux-2.6.2-rc1/arch/um/drivers/line.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/drivers/line.c 2004-01-21 23:30:29.000000000 -0800 @@ -6,8 +6,8 @@ #include "linux/sched.h" #include "linux/slab.h" #include "linux/list.h" +#include "linux/interrupt.h" #include "linux/devfs_fs_kernel.h" -#include "asm/irq.h" #include "asm/uaccess.h" #include "chan_kern.h" #include "irq_user.h" @@ -16,38 +16,55 @@ #include "user_util.h" #include "kern_util.h" #include "os.h" +#include "irq_kern.h" #define LINE_BUFSIZE 4096 -void line_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused) { struct line *dev = data; if(dev->count > 0) chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, dev); + return IRQ_HANDLED; } -void line_timer_cb(void *arg) +static void line_timer_cb(void *arg) { struct line *dev = arg; line_interrupt(dev->driver->read_irq, dev, NULL); } -static void buffer_data(struct line *line, const char *buf, int len) +static int write_room(struct line *dev) { - int end; + int n; + + if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); + + n = dev->head - dev->tail; + if(n <= 0) n = LINE_BUFSIZE + n; + return(n - 1); +} + +static int buffer_data(struct line *line, const char *buf, int len) +{ + int end, room; if(line->buffer == NULL){ line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); if(line->buffer == NULL){ printk("buffer_data - atomic allocation failed\n"); - return; + return(0); } line->head = line->buffer; line->tail = line->buffer; } + + room = write_room(line); + len = (len > room) ? room : len; + end = line->buffer + LINE_BUFSIZE - line->tail; if(len < end){ memcpy(line->tail, buf, len); @@ -60,6 +77,8 @@ static void buffer_data(struct line *lin memcpy(line->buffer, buf, len); line->tail = line->buffer + len; } + + return(len); } static int flush_buffer(struct line *line) @@ -95,7 +114,7 @@ int line_write(struct line *lines, struc struct line *line; char *new; unsigned long flags; - int n, err, i; + int n, err, i, ret = 0; if(tty->stopped) return 0; @@ -104,9 +123,13 @@ int line_write(struct line *lines, struc if(new == NULL) return(0); n = copy_from_user(new, buf, len); - if(n == len) - return(-EFAULT); buf = new; + if(n == len){ + len = -EFAULT; + goto out_free; + } + + len -= n; } i = tty->index; @@ -115,41 +138,50 @@ int line_write(struct line *lines, struc down(&line->sem); if(line->head != line->tail){ local_irq_save(flags); - buffer_data(line, buf, len); + ret += buffer_data(line, buf, len); err = flush_buffer(line); local_irq_restore(flags); if(err <= 0) - goto out; + goto out_up; } else { n = write_chan(&line->chan_list, buf, len, line->driver->write_irq); if(n < 0){ - len = n; - goto out; + ret = n; + goto out_up; } - if(n < len) - buffer_data(line, buf + n, len - n); + + len -= n; + ret += n; + if(len > 0) + ret += buffer_data(line, buf + n, len); } - out: + out_up: up(&line->sem); - return(len); + out_free: + if(from_user) + kfree(buf); + return(ret); } -void line_write_interrupt(int irq, void *data, struct pt_regs *unused) +static irqreturn_t line_write_interrupt(int irq, void *data, + struct pt_regs *unused) { struct line *dev = data; struct tty_struct *tty = dev->tty; int err; err = flush_buffer(dev); - if(err == 0) return; + if(err == 0) + return(IRQ_NONE); else if(err < 0){ dev->head = dev->buffer; dev->tail = dev->buffer; } - if(tty == NULL) return; + if(tty == NULL) + return(IRQ_NONE); if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && (tty->ldisc.write_wakeup != NULL)) @@ -161,21 +193,9 @@ void line_write_interrupt(int irq, void * writes. */ - if (waitqueue_active(&tty->write_wait)) + if(waitqueue_active(&tty->write_wait)) wake_up_interruptible(&tty->write_wait); - -} - -int line_write_room(struct tty_struct *tty) -{ - struct line *dev = tty->driver_data; - int n; - - if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); - - n = dev->head - dev->tail; - if(n <= 0) n = LINE_BUFSIZE + n; - return(n - 1); + return(IRQ_HANDLED); } int line_setup_irq(int fd, int input, int output, void *data) @@ -305,7 +325,7 @@ int line_setup(struct line *lines, int n if(*end != '='){ printk(KERN_ERR "line_setup failed to parse \"%s\"\n", init); - return(1); + return(0); } init = end; } @@ -313,12 +333,12 @@ int line_setup(struct line *lines, int n if((n >= 0) && (n >= num)){ printk("line_setup - %d out of range ((0 ... %d) allowed)\n", n, num); - return(1); + return(0); } else if(n >= 0){ if(lines[n].count > 0){ printk("line_setup - device %d is open\n", n); - return(1); + return(0); } if(lines[n].init_pri <= INIT_ONE){ lines[n].init_pri = INIT_ONE; @@ -332,7 +352,7 @@ int line_setup(struct line *lines, int n else if(!all_allowed){ printk("line_setup - can't configure all devices from " "mconsole\n"); - return(1); + return(0); } else { for(i = 0; i < num; i++){ @@ -346,7 +366,7 @@ int line_setup(struct line *lines, int n } } } - return(0); + return(1); } int line_config(struct line *lines, int num, char *str) @@ -357,7 +377,7 @@ int line_config(struct line *lines, int printk("line_config - uml_strdup failed\n"); return(-ENOMEM); } - return(line_setup(lines, num, new, 0)); + return(!line_setup(lines, num, new, 0)); } int line_get_config(char *name, struct line *lines, int num, char *str, @@ -369,7 +389,7 @@ int line_get_config(char *name, struct l dev = simple_strtoul(name, &end, 0); if((*end != '\0') || (end == name)){ - *error_out = "line_setup failed to parse device number"; + *error_out = "line_get_config failed to parse device number"; return(0); } @@ -379,15 +399,15 @@ int line_get_config(char *name, struct l } line = &lines[dev]; + down(&line->sem); - if(!line->valid) CONFIG_CHUNK(str, size, n, "none", 1); else if(line->count == 0) CONFIG_CHUNK(str, size, n, line->init_str, 1); else n = chan_config_string(&line->chan_list, str, size, error_out); - up(&line->sem); + return(n); } @@ -396,7 +416,14 @@ int line_remove(struct line *lines, int char config[sizeof("conxxxx=none\0")]; sprintf(config, "%s=none", str); - return(line_setup(lines, num, config, 0)); + return(!line_setup(lines, num, config, 0)); +} + +int line_write_room(struct tty_struct *tty) +{ + struct line *dev = tty->driver_data; + + return(write_room(dev)); } struct tty_driver *line_register_devfs(struct lines *set, @@ -412,7 +439,8 @@ struct tty_driver *line_register_devfs(s return NULL; driver->driver_name = line_driver->name; - driver->name = line_driver->devfs_name; + driver->name = line_driver->device_name; + driver->devfs_name = line_driver->devfs_name; driver->major = line_driver->major; driver->minor_start = line_driver->minor_start; driver->type = line_driver->type; @@ -432,7 +460,7 @@ struct tty_driver *line_register_devfs(s for(i = 0; i < nlines; i++){ if(!lines[i].valid) - tty_unregister_devfs(driver, i); + tty_unregister_device(driver, i); } mconsole_register_dev(&line_driver->mc); @@ -465,24 +493,25 @@ struct winch { struct line *line; }; -void winch_interrupt(int irq, void *data, struct pt_regs *unused) +irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) { struct winch *winch = data; struct tty_struct *tty; int err; char c; - err = generic_read(winch->fd, &c, NULL); - if(err < 0){ - if(err != -EAGAIN){ - printk("winch_interrupt : read failed, errno = %d\n", - -err); - printk("fd %d is losing SIGWINCH support\n", - winch->tty_fd); - free_irq(irq, data); - return; + if(winch->fd != -1){ + err = generic_read(winch->fd, &c, NULL); + if(err < 0){ + if(err != -EAGAIN){ + printk("winch_interrupt : read failed, " + "errno = %d\n", -err); + printk("fd %d is losing SIGWINCH support\n", + winch->tty_fd); + return(IRQ_HANDLED); + } + goto out; } - goto out; } tty = winch->line->tty; if(tty != NULL){ @@ -492,7 +521,9 @@ void winch_interrupt(int irq, void *data kill_pg(tty->pgrp, SIGWINCH, 1); } out: - reactivate_fd(winch->fd, WINCH_IRQ); + if(winch->fd != -1) + reactivate_fd(winch->fd, WINCH_IRQ); + return(IRQ_HANDLED); } DECLARE_MUTEX(winch_handler_sem); @@ -529,7 +560,10 @@ static void winch_cleanup(void) list_for_each(ele, &winch_handlers){ winch = list_entry(ele, struct winch, list); - close(winch->fd); + if(winch->fd != -1){ + deactivate_fd(winch->fd, WINCH_IRQ); + os_close_file(winch->fd); + } if(winch->pid != -1) os_kill_process(winch->pid, 1); } --- linux-2.6.2-rc1/arch/um/drivers/Makefile 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/drivers/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ # -# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) # Licensed under the GPL # @@ -39,6 +39,8 @@ obj-$(CONFIG_PTY_CHAN) += pty.o obj-$(CONFIG_TTY_CHAN) += tty.o obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o obj-$(CONFIG_UML_WATCHDOG) += harddog.o +obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o +obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-y += stdio_console.o $(CHAN_OBJS) @@ -46,7 +48,7 @@ USER_SINGLE_OBJS = $(foreach f,$(patsubs USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ null.o pty.o tty.o xterm.o -USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file)) +USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS) : %.o: %.c $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< --- linux-2.6.2-rc1/arch/um/drivers/mcast_user.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/drivers/mcast_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -23,6 +23,7 @@ #include "kern_util.h" #include "user_util.h" #include "user.h" +#include "os.h" #define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) @@ -62,7 +63,8 @@ static int mcast_open(void *data) goto out; } - if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){ + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0){ printk("mcast_open : data socket failed, errno = %d\n", errno); fd = -ENOMEM; @@ -72,7 +74,7 @@ static int mcast_open(void *data) if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -82,7 +84,7 @@ static int mcast_open(void *data) sizeof(pri->ttl)) < 0) { printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -91,7 +93,7 @@ static int mcast_open(void *data) if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -99,7 +101,7 @@ static int mcast_open(void *data) /* bind socket to mcast address */ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { printk("mcast_open : data bind failed, errno = %d\n", errno); - close(fd); + os_close_file(fd); fd = -EINVAL; goto out; } @@ -115,7 +117,7 @@ static int mcast_open(void *data) "interface on the host.\n"); printk("eth0 should be configured in order to use the " "multicast transport.\n"); - close(fd); + os_close_file(fd); fd = -EINVAL; } @@ -137,7 +139,7 @@ static void mcast_close(int fd, void *da errno); } - close(fd); + os_close_file(fd); } int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) --- linux-2.6.2-rc1/arch/um/drivers/mconsole_kern.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/drivers/mconsole_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,6 +1,6 @@ /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -15,6 +15,9 @@ #include "linux/sysrq.h" #include "linux/workqueue.h" #include "linux/module.h" +#include "linux/file.h" +#include "linux/fs.h" +#include "linux/namei.h" #include "linux/proc_fs.h" #include "asm/irq.h" #include "asm/uaccess.h" @@ -27,6 +30,7 @@ #include "init.h" #include "os.h" #include "umid.h" +#include "irq_kern.h" static int do_unlink_socket(struct notifier_block *notifier, unsigned long what, void *data) @@ -67,7 +71,7 @@ void mc_work_proc(void *unused) DECLARE_WORK(mconsole_work, mc_work_proc, NULL); -void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) { int fd; struct mconsole_entry *new; @@ -75,9 +79,10 @@ void mconsole_interrupt(int irq, void *d fd = (int) dev_id; while (mconsole_get_request(fd, &req)){ - if(req.cmd->as_interrupt) (*req.cmd->handler)(&req); + if(req.cmd->context == MCONSOLE_INTR) + (*req.cmd->handler)(&req); else { - new = kmalloc(sizeof(req), GFP_ATOMIC); + new = kmalloc(sizeof(*new), GFP_ATOMIC); if(new == NULL) mconsole_reply(&req, "Out of memory", 1, 0); else { @@ -88,6 +93,7 @@ void mconsole_interrupt(int irq, void *d } if(!list_empty(&mc_requests)) schedule_work(&mconsole_work); reactivate_fd(fd, MCONSOLE_IRQ); + return(IRQ_HANDLED); } void mconsole_version(struct mc_request *req) @@ -100,20 +106,110 @@ void mconsole_version(struct mc_request mconsole_reply(req, version, 0, 0); } +void mconsole_log(struct mc_request *req) +{ + int len; + char *ptr = req->request.data; + + ptr += strlen("log"); + while(isspace(*ptr)) ptr++; + + len = req->len - (ptr - req->request.data); + printk("%.*s", len, ptr); + mconsole_reply(req, "", 0, 0); +} + +void mconsole_proc(struct mc_request *req) +{ + struct nameidata nd; + struct file_system_type *proc; + struct super_block *super; + struct file *file; + int n, err; + char *ptr = req->request.data, *buf; + + ptr += strlen("proc"); + while(isspace(*ptr)) ptr++; + + proc = get_fs_type("proc"); + if(proc == NULL){ + mconsole_reply(req, "procfs not registered", 1, 0); + goto out; + } + + super = (*proc->get_sb)(proc, 0, NULL, NULL); + put_filesystem(proc); + if(super == NULL){ + mconsole_reply(req, "Failed to get procfs superblock", 1, 0); + goto out; + } + up_write(&super->s_umount); + + nd.dentry = super->s_root; + nd.mnt = NULL; + nd.flags = O_RDONLY + 1; + nd.last_type = LAST_ROOT; + + err = link_path_walk(ptr, &nd); + if(err){ + mconsole_reply(req, "Failed to look up file", 1, 0); + goto out_kill; + } + + file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + if(IS_ERR(file)){ + mconsole_reply(req, "Failed to open file", 1, 0); + goto out_kill; + } + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if(buf == NULL){ + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + goto out_fput; + } + + if((file->f_op != NULL) && (file->f_op->read != NULL)){ + do { + n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, + &file->f_pos); + if(n >= 0){ + buf[n] = '\0'; + mconsole_reply(req, buf, 0, (n > 0)); + } + else { + mconsole_reply(req, "Read of file failed", + 1, 0); + goto out_free; + } + } while(n > 0); + } + else mconsole_reply(req, "", 0, 0); + + out_free: + kfree(buf); + out_fput: + fput(file); + out_kill: + deactivate_super(super); + out: ; +} + #define UML_MCONSOLE_HELPTEXT \ -"Commands: - version - Get kernel version - help - Print this message - halt - Halt UML - reboot - Reboot UML - config = - Add a new device to UML; - same syntax as command line - config - Query the configuration of a device - remove - Remove a device from UML - sysrq - Performs the SysRq action controlled by the letter - cad - invoke the Ctl-Alt-Del handler - stop - pause the UML; it will do nothing until it receives a 'go' - go - continue the UML after a 'stop' +"Commands: \n\ + version - Get kernel version \n\ + help - Print this message \n\ + halt - Halt UML \n\ + reboot - Reboot UML \n\ + config = - Add a new device to UML; \n\ + same syntax as command line \n\ + config - Query the configuration of a device \n\ + remove - Remove a device from UML \n\ + sysrq - Performs the SysRq action controlled by the letter \n\ + cad - invoke the Ctl-Alt-Del handler \n\ + stop - pause the UML; it will do nothing until it receives a 'go' \n\ + go - continue the UML after a 'stop' \n\ + log - make UML enter into the kernel log\n\ + proc - returns the contents of the UML's /proc/\n\ " void mconsole_help(struct mc_request *req) @@ -302,7 +398,7 @@ int mconsole_init(void) if(umid_file_name("mconsole", file, sizeof(file))) return(-1); snprintf(mconsole_socket_name, sizeof(file), "%s", file); - sock = create_unix_socket(file, sizeof(file)); + sock = os_create_unix_socket(file, sizeof(file), 1); if (sock < 0){ printk("Failed to initialize management console\n"); return(1); @@ -344,11 +440,16 @@ static int write_proc_mconsole(struct fi if(buf == NULL) return(-ENOMEM); - if(copy_from_user(buf, buffer, count)) - return(-EFAULT); + if(copy_from_user(buf, buffer, count)){ + count = -EFAULT; + goto out; + } + buf[count] = '\0'; mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); + out: + kfree(buf); return(count); } --- linux-2.6.2-rc1/arch/um/drivers/mconsole_user.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/drivers/mconsole_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,6 +1,6 @@ /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -18,16 +18,18 @@ #include "umid.h" static struct mconsole_command commands[] = { - { "version", mconsole_version, 1 }, - { "halt", mconsole_halt, 0 }, - { "reboot", mconsole_reboot, 0 }, - { "config", mconsole_config, 0 }, - { "remove", mconsole_remove, 0 }, - { "sysrq", mconsole_sysrq, 1 }, - { "help", mconsole_help, 1 }, - { "cad", mconsole_cad, 1 }, - { "stop", mconsole_stop, 0 }, - { "go", mconsole_go, 1 }, + { "version", mconsole_version, MCONSOLE_INTR }, + { "halt", mconsole_halt, MCONSOLE_PROC }, + { "reboot", mconsole_reboot, MCONSOLE_PROC }, + { "config", mconsole_config, MCONSOLE_PROC }, + { "remove", mconsole_remove, MCONSOLE_PROC }, + { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, + { "help", mconsole_help, MCONSOLE_INTR }, + { "cad", mconsole_cad, MCONSOLE_INTR }, + { "stop", mconsole_stop, MCONSOLE_PROC }, + { "go", mconsole_go, MCONSOLE_INTR }, + { "log", mconsole_log, MCONSOLE_INTR }, + { "proc", mconsole_proc, MCONSOLE_PROC }, }; /* Initialized in mconsole_init, which is an initcall */ @@ -139,6 +141,7 @@ int mconsole_reply(struct mc_request *re memcpy(reply.data, str, len); reply.data[len] = '\0'; total -= len; + str += len; reply.len = len + 1; len = sizeof(reply) + reply.len - sizeof(reply.data); --- linux-2.6.2-rc1/arch/um/drivers/mmapper_kern.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/drivers/mmapper_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -120,7 +120,10 @@ static int __init mmapper_init(void) printk(KERN_INFO "Mapper v0.1\n"); v_buf = (char *) find_iomem("mmapper", &mmapper_size); - if(mmapper_size == 0) return(0); + if(mmapper_size == 0){ + printk(KERN_ERR "mmapper_init - find_iomem failed\n"); + return(0); + } p_buf = __pa(v_buf); --- linux-2.6.2-rc1/arch/um/drivers/net_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/drivers/net_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -26,6 +26,7 @@ #include "mconsole_kern.h" #include "init.h" #include "irq_user.h" +#include "irq_kern.h" static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; LIST_HEAD(opened); @@ -37,7 +38,8 @@ static int uml_net_rx(struct net_device struct sk_buff *skb; /* If we can't allocate memory, try again next round. */ - if ((skb = dev_alloc_skb(dev->mtu)) == NULL) { + skb = dev_alloc_skb(dev->mtu); + if (skb == NULL) { lp->stats.rx_dropped++; return 0; } @@ -61,14 +63,14 @@ static int uml_net_rx(struct net_device return pkt_len; } -void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *dev = dev_id; struct uml_net_private *lp = dev->priv; int err; if(!netif_running(dev)) - return; + return(IRQ_NONE); spin_lock(&lp->lock); while((err = uml_net_rx(dev)) > 0) ; @@ -83,6 +85,7 @@ void uml_net_interrupt(int irq, void *de out: spin_unlock(&lp->lock); + return(IRQ_HANDLED); } static int uml_net_open(struct net_device *dev) @@ -252,37 +255,6 @@ void uml_net_user_timer_expire(unsigned #endif } -/* - * default do nothing hard header packet routines for struct net_device init. - * real ethernet transports will overwrite with real routines. - */ -static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, unsigned len) -{ - return(0); /* no change */ -} - -static int uml_net_rebuild_header(struct sk_buff *skb) -{ - return(0); /* ignore */ -} - -static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) -{ - return(-1); /* fail */ -} - -static void uml_net_header_cache_update(struct hh_cache *hh, - struct net_device *dev, unsigned char * haddr) -{ - /* ignore */ -} - -static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) -{ - return(0); /* nothing */ -} - static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; static struct list_head devices = LIST_HEAD_INIT(devices); @@ -292,7 +264,7 @@ static int eth_configure(int n, void *in struct uml_net *device; struct net_device *dev; struct uml_net_private *lp; - int err, size; + int save, err, size; size = transport->private_size + sizeof(struct uml_net_private) + sizeof(((struct uml_net_private *) 0)->user); @@ -334,12 +306,6 @@ static int eth_configure(int n, void *in snprintf(dev->name, sizeof(dev->name), "eth%d", n); device->dev = dev; - dev->hard_header = uml_net_hard_header; - dev->rebuild_header = uml_net_rebuild_header; - dev->hard_header_cache = uml_net_header_cache; - dev->header_cache_update= uml_net_header_cache_update; - dev->hard_header_parse = uml_net_header_parse; - (*transport->kern->init)(dev, init); dev->mtu = transport->user->max_packet; @@ -358,25 +324,37 @@ static int eth_configure(int n, void *in rtnl_lock(); err = register_netdevice(dev); rtnl_unlock(); - if (err) + if (err) { + device->dev = NULL; + /* XXX: should we call ->remove() here? */ + free_netdev(dev); return 1; + } lp = dev->priv; - INIT_LIST_HEAD(&lp->list); - spin_lock_init(&lp->lock); - lp->dev = dev; - lp->fd = -1; - lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 }; - lp->have_mac = device->have_mac; - lp->protocol = transport->kern->protocol; - lp->open = transport->user->open; - lp->close = transport->user->close; - lp->remove = transport->user->remove; - lp->read = transport->kern->read; - lp->write = transport->kern->write; - lp->add_address = transport->user->add_address; - lp->delete_address = transport->user->delete_address; - lp->set_mtu = transport->user->set_mtu; + /* lp.user is the first four bytes of the transport data, which + * has already been initialized. This structure assignment will + * overwrite that, so we make sure that .user gets overwritten with + * what it already has. + */ + save = lp->user[0]; + *lp = ((struct uml_net_private) + { .list = LIST_HEAD_INIT(lp->list), + .lock = SPIN_LOCK_UNLOCKED, + .dev = dev, + .fd = -1, + .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, + .have_mac = device->have_mac, + .protocol = transport->kern->protocol, + .open = transport->user->open, + .close = transport->user->close, + .remove = transport->user->remove, + .read = transport->kern->read, + .write = transport->kern->write, + .add_address = transport->user->add_address, + .delete_address = transport->user->delete_address, + .set_mtu = transport->user->set_mtu, + .user = { save } }); init_timer(&lp->tl); lp->tl.function = uml_net_user_timer_expire; @@ -609,7 +587,8 @@ static int net_remove(char *str) unregister_netdev(dev); list_del(&device->list); - free_netdev(device); + kfree(device); + free_netdev(dev); return(0); } --- linux-2.6.2-rc1/arch/um/drivers/net_user.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/drivers/net_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -26,8 +26,7 @@ int tap_open_common(void *dev, char *gat if(gate_addr == NULL) return(0); if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ - printk("Invalid tap IP address - '%s'\n", - gate_addr); + printk("Invalid tap IP address - '%s'\n", gate_addr); return(-EINVAL); } return(0); @@ -60,18 +59,18 @@ void read_output(int fd, char *output, i } *output = '\0'; - if(read(fd, &remain, sizeof(remain)) != sizeof(remain)){ - printk("read_output - read of length failed, errno = %d\n", - errno); + n = os_read_file(fd, &remain, sizeof(remain)); + if(n != sizeof(remain)){ + printk("read_output - read of length failed, err = %d\n", -n); return; } while(remain != 0){ n = (remain < len) ? remain : len; - actual = read(fd, output, n); + actual = os_read_file(fd, output, n); if(actual != n){ printk("read_output - read of data failed, " - "errno = %d\n", errno); + "err = %d\n", -actual); return; } remain -= actual; @@ -83,13 +82,12 @@ int net_read(int fd, void *buf, int len) { int n; - while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ; + n = os_read_file(fd, buf, len); - if(n < 0){ - if(errno == EAGAIN) return(0); - return(-errno); - } - else if(n == 0) return(-ENOTCONN); + if(n == -EAGAIN) + return(0); + else if(n == 0) + return(-ENOTCONN); return(n); } @@ -112,13 +110,13 @@ int net_write(int fd, void *buf, int len { int n; - while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ; - if(n < 0){ - if(errno == EAGAIN) return(0); - return(-errno); - } - else if(n == 0) return(-ENOTCONN); - return(n); + n = os_write_file(fd, buf, len); + + if(n == -EAGAIN) + return(0); + else if(n == 0) + return(-ENOTCONN); + return(n); } int net_send(int fd, void *buf, int len) @@ -157,7 +155,7 @@ static void change_pre_exec(void *arg) { struct change_pre_exec_data *data = arg; - close(data->close_me); + os_close_file(data->close_me); dup2(data->stdout, 1); } @@ -167,15 +165,15 @@ static int change_tramp(char **argv, cha struct change_pre_exec_data pe_data; err = os_pipe(fds, 1, 0); - if(err){ - printk("change_tramp - pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("change_tramp - pipe failed, err = %d\n", -err); return(err); } pe_data.close_me = fds[0]; pe_data.stdout = fds[1]; pid = run_helper(change_pre_exec, &pe_data, argv, NULL); - close(fds[1]); + os_close_file(fds[1]); read_output(fds[0], output, output_len); waitpid(pid, NULL, 0); return(pid); --- linux-2.6.2-rc1/arch/um/drivers/null.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/drivers/null.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include "chan_user.h" #include "os.h" --- linux-2.6.2-rc1/arch/um/drivers/port_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/drivers/port_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -6,6 +6,7 @@ #include "linux/list.h" #include "linux/sched.h" #include "linux/slab.h" +#include "linux/interrupt.h" #include "linux/irq.h" #include "linux/spinlock.h" #include "linux/errno.h" @@ -14,6 +15,7 @@ #include "kern_util.h" #include "kern.h" #include "irq_user.h" +#include "irq_kern.h" #include "port.h" #include "init.h" #include "os.h" @@ -38,21 +40,21 @@ struct port_dev { struct connection { struct list_head list; int fd; - int helper_pid; + int helper_pid; int socket[2]; int telnetd_pid; struct port_list *port; }; -static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs) { struct connection *conn = data; int fd; - fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); + fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); if(fd < 0){ if(fd == -EAGAIN) - return; + return(IRQ_NONE); printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", -fd); @@ -65,6 +67,7 @@ static void pipe_interrupt(int irq, void list_add(&conn->list, &conn->port->connections); up(&conn->port->sem); + return(IRQ_HANDLED); } static int port_accept(struct port_list *port) @@ -102,8 +105,7 @@ static int port_accept(struct port_list } list_add(&conn->list, &port->pending); - ret = 1; - goto out; + return(1); out_free: kfree(conn); @@ -138,12 +140,13 @@ void port_work_proc(void *unused) DECLARE_WORK(port_work, port_work_proc, NULL); -static void port_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs) { struct port_list *port = data; port->has_connection = 1; schedule_work(&port_work); + return(IRQ_HANDLED); } void *port_data(int port_num) --- linux-2.6.2-rc1/arch/um/drivers/port_user.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/drivers/port_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -47,10 +47,12 @@ void *port_init(char *str, int device, s return(NULL); } - if((kern_data = port_data(port)) == NULL) + kern_data = port_data(port); + if(kern_data == NULL) return(NULL); - if((data = um_kmalloc(sizeof(*data))) == NULL) + data = um_kmalloc(sizeof(*data)); + if(data == NULL) goto err; *data = ((struct port_chan) { .raw = opts->raw, @@ -90,7 +92,7 @@ void port_close(int fd, void *d) struct port_chan *data = d; port_remove_dev(data->kernel_data); - close(fd); + os_close_file(fd); } int port_console_write(int fd, const char *buf, int n, void *d) @@ -130,11 +132,15 @@ int port_listen_fd(int port) goto out; } - if((listen(fd, 1) < 0) || (os_set_fd_block(fd, 0))){ + if(listen(fd, 1) < 0){ err = -errno; goto out; } + err = os_set_fd_block(fd, 0); + if(err < 0) + goto out; + return(fd); out: os_close_file(fd); @@ -153,10 +159,10 @@ void port_pre_exec(void *arg) dup2(data->sock_fd, 0); dup2(data->sock_fd, 1); dup2(data->sock_fd, 2); - close(data->sock_fd); + os_close_file(data->sock_fd); dup2(data->pipe_fd, 3); os_shutdown_socket(3, 1, 0); - close(data->pipe_fd); + os_close_file(data->pipe_fd); } int port_connection(int fd, int *socket, int *pid_out) @@ -166,11 +172,12 @@ int port_connection(int fd, int *socket, "/usr/lib/uml/port-helper", NULL }; struct port_pre_exec_data data; - if((new = os_accept_connection(fd)) < 0) - return(-errno); + new = os_accept_connection(fd); + if(new < 0) + return(new); err = os_pipe(socket, 0, 0); - if(err) + if(err < 0) goto out_close; data = ((struct port_pre_exec_data) @@ -186,11 +193,11 @@ int port_connection(int fd, int *socket, out_shutdown: os_shutdown_socket(socket[0], 1, 1); - close(socket[0]); + os_close_file(socket[0]); os_shutdown_socket(socket[1], 1, 1); - close(socket[1]); + os_close_file(socket[1]); out_close: - close(new); + os_close_file(new); return(err); } --- linux-2.6.2-rc1/arch/um/drivers/pty.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/drivers/pty.c 2004-01-21 23:30:29.000000000 -0800 @@ -7,12 +7,12 @@ #include #include #include -#include #include #include "chan_user.h" #include "user.h" #include "user_util.h" #include "kern_util.h" +#include "os.h" struct pty_chan { void (*announce)(char *dev_name, int dev); @@ -26,7 +26,8 @@ void *pty_chan_init(char *str, int devic { struct pty_chan *data; - if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + data = um_kmalloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct pty_chan) { .announce = opts->announce, .dev = device, .raw = opts->raw }); @@ -39,7 +40,8 @@ int pts_open(int input, int output, int char *dev; int fd; - if((fd = get_pty()) < 0){ + fd = get_pty(); + if(fd < 0){ printk("open_pts : Failed to open pts\n"); return(-errno); } @@ -57,29 +59,27 @@ int pts_open(int input, int output, int int getmaster(char *line) { - struct stat stb; char *pty, *bank, *cp; - int master; + int master, err; pty = &line[strlen("/dev/ptyp")]; for (bank = "pqrs"; *bank; bank++) { line[strlen("/dev/pty")] = *bank; *pty = '0'; - if (stat(line, &stb) < 0) + if (os_stat_file(line, NULL) < 0) break; for (cp = "0123456789abcdef"; *cp; cp++) { *pty = *cp; - master = open(line, O_RDWR); + master = os_open_file(line, of_rdwr(OPENFLAGS()), 0); if (master >= 0) { char *tp = &line[strlen("/dev/")]; - int ok; /* verify slave side is usable */ *tp = 't'; - ok = access(line, R_OK|W_OK) == 0; + err = os_access(line, OS_ACC_RW_OK); *tp = 'p'; - if (ok) return(master); - (void) close(master); + if(err == 0) return(master); + (void) os_close_file(master); } } } --- linux-2.6.2-rc1/arch/um/drivers/slip_user.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/drivers/slip_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -4,11 +4,9 @@ #include #include #include -#include #include #include #include -#include #include #include "user_util.h" #include "kern_util.h" @@ -65,9 +63,9 @@ static void slip_pre_exec(void *arg) { struct slip_pre_exec_data *data = arg; - if(data->stdin != -1) dup2(data->stdin, 0); + if(data->stdin >= 0) dup2(data->stdin, 0); dup2(data->stdout, 1); - if(data->close_me != -1) close(data->close_me); + if(data->close_me >= 0) os_close_file(data->close_me); } static int slip_tramp(char **argv, int fd) @@ -77,8 +75,8 @@ static int slip_tramp(char **argv, int f int status, pid, fds[2], err, output_len; err = os_pipe(fds, 1, 0); - if(err){ - printk("slip_tramp : pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("slip_tramp : pipe failed, err = %d\n", -err); return(err); } @@ -96,7 +94,7 @@ static int slip_tramp(char **argv, int f printk("slip_tramp : failed to allocate output " "buffer\n"); - close(fds[1]); + os_close_file(fds[1]); read_output(fds[0], output, output_len); if(output != NULL){ printk("%s", output); @@ -105,7 +103,7 @@ static int slip_tramp(char **argv, int f if(waitpid(pid, &status, 0) < 0) err = errno; else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ printk("'%s' didn't exit with status 0\n", argv[0]); - err = EINVAL; + err = -EINVAL; } } return(err); @@ -118,15 +116,17 @@ static int slip_open(void *data) char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, NULL }; - int sfd, mfd, disc, sencap, err; + int sfd, mfd, err; - if((mfd = get_pty()) < 0){ - printk("umn : Failed to open pty\n"); - return(-1); + mfd = get_pty(); + if(mfd < 0){ + printk("umn : Failed to open pty, err = %d\n", -mfd); + return(mfd); } - if((sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0)) < 0){ - printk("Couldn't open tty for slip line\n"); - return(-1); + sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); + if(sfd < 0){ + printk("Couldn't open tty for slip line, err = %d\n", -sfd); + return(sfd); } if(set_up_tty(sfd)) return(-1); pri->slave = sfd; @@ -138,28 +138,23 @@ static int slip_open(void *data) err = slip_tramp(argv, sfd); - if(err != 0){ - printk("slip_tramp failed - errno = %d\n", err); - return(-err); + if(err < 0){ + printk("slip_tramp failed - err = %d\n", -err); + return(err); } - if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){ - printk("SIOCGIFNAME failed, errno = %d\n", errno); - return(-errno); + err = os_get_ifname(pri->slave, pri->name); + if(err < 0){ + printk("get_ifname failed, err = %d\n", -err); + return(err); } iter_addresses(pri->dev, open_addr, pri->name); } else { - disc = N_SLIP; - if(ioctl(sfd, TIOCSETD, &disc) < 0){ - printk("Failed to set slip line discipline - " - "errno = %d\n", errno); - return(-errno); - } - sencap = 0; - if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){ - printk("Failed to set slip encapsulation - " - "errno = %d\n", errno); - return(-errno); + err = os_set_slip(sfd); + if(err < 0){ + printk("Failed to set slip discipline encapsulation - " + "err = %d\n", -err); + return(err); } } return(mfd); @@ -181,9 +176,9 @@ static void slip_close(int fd, void *dat err = slip_tramp(argv, -1); if(err != 0) - printk("slip_tramp failed - errno = %d\n", err); - close(fd); - close(pri->slave); + printk("slip_tramp failed - errno = %d\n", -err); + os_close_file(fd); + os_close_file(pri->slave); pri->slave = -1; } @@ -243,7 +238,7 @@ static void slip_add_addr(unsigned char { struct slip_data *pri = data; - if(pri->slave == -1) return; + if(pri->slave < 0) return; open_addr(addr, netmask, pri->name); } @@ -252,7 +247,7 @@ static void slip_del_addr(unsigned char { struct slip_data *pri = data; - if(pri->slave == -1) return; + if(pri->slave < 0) return; close_addr(addr, netmask, pri->name); } --- linux-2.6.2-rc1/arch/um/drivers/slirp_user.c 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/drivers/slirp_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -48,15 +47,15 @@ static int slirp_tramp(char **argv, int return(pid); } - + +/* XXX This is just a trivial wrapper around os_pipe */ static int slirp_datachan(int *mfd, int *sfd) { int fds[2], err; err = os_pipe(fds, 1, 1); - if(err){ - printk("slirp_datachan: Failed to open pipe, errno = %d\n", - -err); + if(err < 0){ + printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); return(err); } @@ -77,7 +76,7 @@ static int slirp_open(void *data) pid = slirp_tramp(pri->argw.argv, sfd); if(pid < 0){ - printk("slirp_tramp failed - errno = %d\n", pid); + printk("slirp_tramp failed - errno = %d\n", -pid); os_close_file(sfd); os_close_file(mfd); return(pid); @@ -97,8 +96,8 @@ static void slirp_close(int fd, void *da struct slirp_data *pri = data; int status,err; - close(fd); - close(pri->slave); + os_close_file(fd); + os_close_file(pri->slave); pri->slave = -1; --- linux-2.6.2-rc1/arch/um/drivers/ssl.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/um/drivers/ssl.c 2004-01-21 23:30:29.000000000 -0800 @@ -10,6 +10,7 @@ #include "linux/major.h" #include "linux/mm.h" #include "linux/init.h" +#include "linux/console.h" #include "asm/termbits.h" #include "asm/irq.h" #include "line.h" @@ -53,8 +54,9 @@ static int ssl_remove(char *str); static struct line_driver driver = { .name = "UML serial line", - .devfs_name = "tts/%d", - .major = TTYAUX_MAJOR, + .device_name = "ttS", + .devfs_name = "tts/", + .major = TTY_MAJOR, .minor_start = 64, .type = TTY_DRIVER_TYPE_SERIAL, .subtype = 0, @@ -149,6 +151,9 @@ static int ssl_ioctl(struct tty_struct * case TCSETSW: case TCGETA: case TIOCMGET: + case TCSBRK: + case TCSBRKP: + case TIOCMSET: ret = -ENOIOCTLCMD; break; default: @@ -212,6 +217,39 @@ static struct tty_operations ssl_ops = { */ static int ssl_init_done = 0; +extern int tty_init(void); + +static void ssl_console_write(struct console *c, const char *string, + unsigned len) +{ + struct line *line = &serial_lines[c->index]; + if(ssl_init_done) + down(&line->sem); + console_write_chan(&line->chan_list, string, len); + if(ssl_init_done) + up(&line->sem); +} + +static struct tty_driver *ssl_console_device(struct console *c, int *index) +{ + *index = c->index; + return ssl_driver; +} + +static int ssl_console_setup(struct console *co, char *options) +{ + return(0); +} + +static struct console ssl_cons = { + name: "ttyS", + write: ssl_console_write, + device: ssl_console_device, + setup: ssl_console_setup, + flags: CON_PRINTBUFFER, + index: -1, +}; + int ssl_init(void) { char *new_title; @@ -219,6 +257,8 @@ int ssl_init(void) printk(KERN_INFO "Initializing software serial port version %d\n", ssl_version); + tty_init(); + ssl_driver = line_register_devfs(&lines, &driver, &ssl_ops, serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0])); @@ -227,6 +267,7 @@ int ssl_init(void) new_title = add_xterm_umid(opts.xterm_title); if(new_title != NULL) opts.xterm_title = new_title; + register_console(&ssl_cons); ssl_init_done = 1; return(0); } @@ -235,9 +276,9 @@ __initcall(ssl_init); static int ssl_chan_setup(char *str) { - line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]), - str, 1); - return(1); + return(line_setup(serial_lines, + sizeof(serial_lines)/sizeof(serial_lines[0]), + str, 1)); } __setup("ssl", ssl_chan_setup); --- linux-2.6.2-rc1/arch/um/drivers/stdio_console.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/drivers/stdio_console.c 2004-01-21 23:30:29.000000000 -0800 @@ -83,7 +83,8 @@ static int con_remove(char *str); static struct line_driver driver = { .name = "UML console", - .devfs_name = "vc/%d", + .device_name = "tty", + .devfs_name = "vc/", .major = TTY_MAJOR, .minor_start = 0, .type = TTY_DRIVER_TYPE_CONSOLE, @@ -159,14 +160,28 @@ static int chars_in_buffer(struct tty_st static int con_init_done = 0; +static struct tty_operations console_ops = { + .open = con_open, + .close = con_close, + .write = con_write, + .chars_in_buffer = chars_in_buffer, + .set_termios = set_termios, + .write_room = line_write_room, +}; + +extern int tty_init(void); + int stdio_init(void) { char *new_title; printk(KERN_INFO "Initializing stdio console driver\n"); + tty_init(); + console_driver = line_register_devfs(&console_lines, &driver, - &console_ops, vts, sizeof(vts)/sizeof(vts[0])); + &console_ops, vts, + sizeof(vts)/sizeof(vts[0])); lines_init(vts, sizeof(vts)/sizeof(vts[0])); @@ -183,19 +198,14 @@ __initcall(stdio_init); static void console_write(struct console *console, const char *string, unsigned len) { - if(con_init_done) down(&vts[console->index].sem); - console_write_chan(&vts[console->index].chan_list, string, len); - if(con_init_done) up(&vts[console->index].sem); -} + struct line *line = &vts[console->index]; -static struct tty_operations console_ops = { - .open = con_open, - .close = con_close, - .write = con_write, - .chars_in_buffer = chars_in_buffer, - .set_termios = set_termios, - .write_room = line_write_room, -}; + if(con_init_done) + down(&line->sem); + console_write_chan(&line->chan_list, string, len); + if(con_init_done) + up(&line->sem); +} static struct tty_driver *console_device(struct console *c, int *index) { @@ -208,22 +218,28 @@ static int console_setup(struct console return(0); } -static struct console stdiocons = INIT_CONSOLE("tty", console_write, - console_device, console_setup, - CON_PRINTBUFFER); +static struct console stdiocons = { + name: "tty", + write: console_write, + device: console_device, + setup: console_setup, + flags: CON_PRINTBUFFER, + index: -1, +}; -static void __init stdio_console_init(void) +static int __init stdio_console_init(void) { INIT_LIST_HEAD(&vts[0].chan_list); list_add(&init_console_chan.list, &vts[0].chan_list); register_console(&stdiocons); + return(0); } + console_initcall(stdio_console_init); static int console_chan_setup(char *str) { - line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1); - return(1); + return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1)); } __setup("con", console_chan_setup); --- linux-2.6.2-rc1/arch/um/drivers/tty.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/drivers/tty.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include #include #include "chan_user.h" @@ -30,7 +29,8 @@ void *tty_chan_init(char *str, int devic } str++; - if((data = um_kmalloc(sizeof(*data))) == NULL) + data = um_kmalloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct tty_chan) { .dev = str, .raw = opts->raw }); --- linux-2.6.2-rc1/arch/um/drivers/ubd_kern.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/um/drivers/ubd_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,6 +8,13 @@ * old style ubd by setting UBD_SHIFT to 0 * 2002-09-27...2002-10-18 massive tinkering for 2.5 * partitions have changed in 2.5 + * 2003-01-29 more tinkering for 2.5.59-1 + * This should now address the sysfs problems and has + * the symlink for devfs to allow for booting with + * the common /dev/ubd/discX/... names rather than + * only /dev/ubdN/discN this version also has lots of + * clean ups preparing for ubd-many. + * James McMechan */ #define MAJOR_NR UBD_MAJOR @@ -40,9 +47,12 @@ #include "mconsole_kern.h" #include "init.h" #include "irq_user.h" +#include "irq_kern.h" #include "ubd_user.h" #include "2_5compat.h" #include "os.h" +#include "mem.h" +#include "mem_kern.h" static spinlock_t ubd_io_lock = SPIN_LOCK_UNLOCKED; static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; @@ -56,6 +66,10 @@ static int ubd_ioctl(struct inode * inod #define MAX_DEV (8) +/* Changed in early boot */ +static int ubd_do_mmap = 0; +#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE + static struct block_device_operations ubd_blops = { .owner = THIS_MODULE, .open = ubd_open, @@ -67,7 +81,7 @@ static struct block_device_operations ub static request_queue_t *ubd_queue; /* Protected by ubd_lock */ -static int fake_major = 0; +static int fake_major = MAJOR_NR; static struct gendisk *ubd_gendisk[MAX_DEV]; static struct gendisk *fake_gendisk[MAX_DEV]; @@ -96,13 +110,19 @@ struct cow { struct ubd { char *file; - int is_dir; int count; int fd; __u64 size; struct openflags boot_openflags; struct openflags openflags; + int no_cow; struct cow cow; + + int map_writes; + int map_reads; + int nomap_writes; + int nomap_reads; + int write_maps; }; #define DEFAULT_COW { \ @@ -115,21 +135,28 @@ struct ubd { #define DEFAULT_UBD { \ .file = NULL, \ - .is_dir = 0, \ .count = 0, \ .fd = -1, \ .size = -1, \ .boot_openflags = OPEN_FLAGS, \ .openflags = OPEN_FLAGS, \ + .no_cow = 0, \ .cow = DEFAULT_COW, \ + .map_writes = 0, \ + .map_reads = 0, \ + .nomap_writes = 0, \ + .nomap_reads = 0, \ + .write_maps = 0, \ } struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; static int ubd0_init(void) { - if(ubd_dev[0].file == NULL) - ubd_dev[0].file = "root_fs"; + struct ubd *dev = &ubd_dev[0]; + + if(dev->file == NULL) + dev->file = "root_fs"; return(0); } @@ -196,19 +223,46 @@ __uml_help(fake_ide_setup, " Create ide0 entries that map onto ubd devices.\n\n" ); +static int parse_unit(char **ptr) +{ + char *str = *ptr, *end; + int n = -1; + + if(isdigit(*str)) { + n = simple_strtoul(str, &end, 0); + if(end == str) + return(-1); + *ptr = end; + } + else if (('a' <= *str) && (*str <= 'h')) { + n = *str - 'a'; + str++; + *ptr = str; + } + return(n); +} + static int ubd_setup_common(char *str, int *index_out) { + struct ubd *dev; struct openflags flags = global_openflags; char *backing_file; int n, err; if(index_out) *index_out = -1; - n = *str++; + n = *str; if(n == '='){ - static int fake_major_allowed = 1; char *end; int major; + str++; + if(!strcmp(str, "mmap")){ + CHOOSE_MODE(printk("mmap not supported by the ubd " + "driver in tt mode\n"), + ubd_do_mmap = 1); + return(0); + } + if(!strcmp(str, "sync")){ global_openflags.s = 1; return(0); @@ -220,20 +274,14 @@ static int ubd_setup_common(char *str, i return(1); } - if(!fake_major_allowed){ - printk(KERN_ERR "Can't assign a fake major twice\n"); - return(1); - } - err = 1; spin_lock(&ubd_lock); - if(!fake_major_allowed){ + if(fake_major != MAJOR_NR){ printk(KERN_ERR "Can't assign a fake major twice\n"); goto out1; } fake_major = major; - fake_major_allowed = 0; printk(KERN_INFO "Setting extra ubd major number to %d\n", major); @@ -243,25 +291,23 @@ static int ubd_setup_common(char *str, i return(err); } - if(n < '0'){ - printk(KERN_ERR "ubd_setup : index out of range\n"); } - - if((n >= '0') && (n <= '9')) n -= '0'; - else if((n >= 'a') && (n <= 'z')) n -= 'a'; - else { - printk(KERN_ERR "ubd_setup : device syntax invalid\n"); + n = parse_unit(&str); + if(n < 0){ + printk(KERN_ERR "ubd_setup : couldn't parse unit number " + "'%s'\n", str); return(1); } if(n >= MAX_DEV){ - printk(KERN_ERR "ubd_setup : index out of range " - "(%d devices)\n", MAX_DEV); + printk(KERN_ERR "ubd_setup : index %d out of range " + "(%d devices)\n", n, MAX_DEV); return(1); } err = 1; spin_lock(&ubd_lock); - if(ubd_dev[n].file != NULL){ + dev = &ubd_dev[n]; + if(dev->file != NULL){ printk(KERN_ERR "ubd_setup : device already configured\n"); goto out2; } @@ -276,6 +322,11 @@ static int ubd_setup_common(char *str, i flags.s = 1; str++; } + if (*str == 'd'){ + dev->no_cow = 1; + str++; + } + if(*str++ != '='){ printk(KERN_ERR "ubd_setup : Expected '='\n"); goto out2; @@ -284,14 +335,17 @@ static int ubd_setup_common(char *str, i err = 0; backing_file = strchr(str, ','); if(backing_file){ - *backing_file = '\0'; - backing_file++; + if(dev->no_cow) + printk(KERN_ERR "Can't specify both 'd' and a " + "cow file\n"); + else { + *backing_file = '\0'; + backing_file++; + } } - ubd_dev[n].file = str; - if(ubd_is_dir(ubd_dev[n].file)) - ubd_dev[n].is_dir = 1; - ubd_dev[n].cow.file = backing_file; - ubd_dev[n].boot_openflags = flags; + dev->file = str; + dev->cow.file = backing_file; + dev->boot_openflags = flags; out2: spin_unlock(&ubd_lock); return(err); @@ -321,8 +375,7 @@ __uml_help(ubd_setup, static int fakehd_set = 0; static int fakehd(char *str) { - printk(KERN_INFO - "fakehd : Changing ubd name to \"hd\".\n"); + printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); fakehd_set = 1; return 1; } @@ -368,32 +421,42 @@ static void ubd_handler(void) { struct io_thread_req req; struct request *rq = elv_next_request(ubd_queue); - int n; + int n, err; do_ubd = NULL; intr_count++; n = read_ubd_fs(thread_fd, &req, sizeof(req)); if(n != sizeof(req)){ printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " - "errno = %d\n", os_getpid(), -n); + "err = %d\n", os_getpid(), -n); spin_lock(&ubd_io_lock); end_request(rq, 0); spin_unlock(&ubd_io_lock); return; } - if((req.offset != ((__u64) (rq->sector)) << 9) || - (req.length != (rq->current_nr_sectors) << 9)) + if((req.op != UBD_MMAP) && + ((req.offset != ((__u64) (rq->sector)) << 9) || + (req.length != (rq->current_nr_sectors) << 9))) panic("I/O op mismatch"); + if(req.map_fd != -1){ + err = physmem_subst_mapping(req.buffer, req.map_fd, + req.map_offset, 1); + if(err) + printk("ubd_handler - physmem_subst_mapping failed, " + "err = %d\n", -err); + } + ubd_finish(rq, req.error); reactivate_fd(thread_fd, UBD_IRQ); do_ubd_request(ubd_queue); } -static void ubd_intr(int irq, void *dev, struct pt_regs *unused) +static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { ubd_handler(); + return(IRQ_HANDLED); } /* Only changed by ubd_init, which is an initcall. */ @@ -429,18 +492,20 @@ static void ubd_close(struct ubd *dev) static int ubd_open_dev(struct ubd *dev) { struct openflags flags; - int err, n, create_cow, *create_ptr; + char **back_ptr; + int err, create_cow, *create_ptr; + dev->openflags = dev->boot_openflags; create_cow = 0; create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; - dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file, + back_ptr = dev->no_cow ? NULL : &dev->cow.file; + dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - n = dev - ubd_dev; dev->fd = create_cow_file(dev->file, dev->cow.file, - dev->openflags, 1 << 9, + dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, &dev->cow.data_offset); @@ -455,13 +520,17 @@ static int ubd_open_dev(struct ubd *dev) if(dev->cow.file != NULL){ err = -ENOMEM; dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); - if(dev->cow.bitmap == NULL) goto error; + if(dev->cow.bitmap == NULL){ + printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); + goto error; + } flush_tlb_kernel_vm(); err = read_cow_bitmap(dev->fd, dev->cow.bitmap, dev->cow.bitmap_offset, dev->cow.bitmap_len); - if(err) goto error; + if(err < 0) + goto error; flags = dev->openflags; flags.w = 0; @@ -481,17 +550,31 @@ static int ubd_new_disk(int major, u64 s { struct gendisk *disk; + char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")]; + int err; disk = alloc_disk(1 << UBD_SHIFT); - if (!disk) - return -ENOMEM; + if(disk == NULL) + return(-ENOMEM); disk->major = major; disk->first_minor = unit << UBD_SHIFT; disk->fops = &ubd_blops; set_capacity(disk, size / 512); - sprintf(disk->disk_name, "ubd"); - sprintf(disk->devfs_name, "ubd/disc%d", unit); + if(major == MAJOR_NR){ + sprintf(disk->disk_name, "ubd%c", 'a' + unit); + sprintf(disk->devfs_name, "ubd/disc%d", unit); + sprintf(from, "ubd/%d", unit); + sprintf(to, "disc%d/disc", unit); + err = devfs_mk_symlink(from, to); + if(err) + printk("ubd_new_disk failed to make link from %s to " + "%s, error = %d\n", from, to, err); + } + else { + sprintf(disk->disk_name, "ubd_fake%d", unit); + sprintf(disk->devfs_name, "ubd_fake/disc%d", unit); + } disk->private_data = &ubd_dev[unit]; disk->queue = ubd_queue; @@ -506,24 +589,21 @@ static int ubd_add(int n) struct ubd *dev = &ubd_dev[n]; int err; - if(dev->is_dir) - return(-EISDIR); - - if (!dev->file) + if(dev->file == NULL) return(-ENODEV); if (ubd_open_dev(dev)) return(-ENODEV); err = ubd_file_size(dev, &dev->size); - if(err) + if(err < 0) return(err); err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); if(err) return(err); - if(fake_major) + if(fake_major != MAJOR_NR) ubd_new_disk(fake_major, dev->size, n, &fake_gendisk[n]); @@ -561,42 +641,42 @@ static int ubd_config(char *str) return(err); } -static int ubd_get_config(char *dev, char *str, int size, char **error_out) +static int ubd_get_config(char *name, char *str, int size, char **error_out) { - struct ubd *ubd; + struct ubd *dev; char *end; - int major, n = 0; + int n, len = 0; - major = simple_strtoul(dev, &end, 0); - if((*end != '\0') || (end == dev)){ - *error_out = "ubd_get_config : didn't parse major number"; + n = simple_strtoul(name, &end, 0); + if((*end != '\0') || (end == name)){ + *error_out = "ubd_get_config : didn't parse device number"; return(-1); } - if((major >= MAX_DEV) || (major < 0)){ - *error_out = "ubd_get_config : major number out of range"; + if((n >= MAX_DEV) || (n < 0)){ + *error_out = "ubd_get_config : device number out of range"; return(-1); } - ubd = &ubd_dev[major]; + dev = &ubd_dev[n]; spin_lock(&ubd_lock); - if(ubd->file == NULL){ - CONFIG_CHUNK(str, size, n, "", 1); + if(dev->file == NULL){ + CONFIG_CHUNK(str, size, len, "", 1); goto out; } - CONFIG_CHUNK(str, size, n, ubd->file, 0); + CONFIG_CHUNK(str, size, len, dev->file, 0); - if(ubd->cow.file != NULL){ - CONFIG_CHUNK(str, size, n, ",", 0); - CONFIG_CHUNK(str, size, n, ubd->cow.file, 1); + if(dev->cow.file != NULL){ + CONFIG_CHUNK(str, size, len, ",", 0); + CONFIG_CHUNK(str, size, len, dev->cow.file, 1); } - else CONFIG_CHUNK(str, size, n, "", 1); + else CONFIG_CHUNK(str, size, len, "", 1); out: spin_unlock(&ubd_lock); - return(n); + return(len); } static int ubd_remove(char *str) @@ -604,11 +684,9 @@ static int ubd_remove(char *str) struct ubd *dev; int n, err = -ENODEV; - if(!isdigit(*str)) - return(err); /* it should be a number 0-7/a-h */ + n = parse_unit(&str); - n = *str - '0'; - if(n >= MAX_DEV) + if((n < 0) || (n >= MAX_DEV)) return(err); dev = &ubd_dev[n]; @@ -669,7 +747,7 @@ int ubd_init(void) elevator_init(ubd_queue, &elevator_noop); - if (fake_major != 0) { + if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; snprintf(name, sizeof(name), "ubd_%d", fake_major); @@ -696,6 +774,7 @@ int ubd_driver_init(void){ io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), &thread_fd); if(io_pid < 0){ + io_pid = -1; printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " "falling back to synchronous I/O\n", -io_pid); @@ -703,8 +782,8 @@ int ubd_driver_init(void){ } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, SA_INTERRUPT, "ubd", ubd_dev); - if(err != 0) printk(KERN_ERR - "um_request_irq failed - errno = %d\n", -err); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); return(err); } @@ -714,15 +793,9 @@ static int ubd_open(struct inode *inode, { struct gendisk *disk = inode->i_bdev->bd_disk; struct ubd *dev = disk->private_data; - int err = -EISDIR; - - if(dev->is_dir == 1) - goto out; + int err = 0; - err = 0; if(dev->count == 0){ - dev->openflags = dev->boot_openflags; - err = ubd_open_dev(dev); if(err){ printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", @@ -749,62 +822,156 @@ static int ubd_release(struct inode * in return(0); } -void cowify_req(struct io_thread_req *req, struct ubd *dev) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) +{ + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; + + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / + sizeof(unsigned long) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) { - int i, update_bitmap, sector = req->offset >> 9; + __u64 sector = req->offset >> 9; + int i; if(req->length > (sizeof(req->sector_mask) * 8) << 9) panic("Operation too long"); + if(req->op == UBD_READ) { for(i = 0; i < req->length >> 9; i++){ - if(ubd_test_bit(sector + i, (unsigned char *) - dev->cow.bitmap)){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) &req->sector_mask); - } } - } - else { - update_bitmap = 0; - for(i = 0; i < req->length >> 9; i++){ - ubd_set_bit(i, (unsigned char *) - &req->sector_mask); - if(!ubd_test_bit(sector + i, (unsigned char *) - dev->cow.bitmap)) - update_bitmap = 1; - ubd_set_bit(sector + i, (unsigned char *) - dev->cow.bitmap); - } - if(update_bitmap){ - req->cow_offset = sector / (sizeof(unsigned long) * 8); - req->bitmap_words[0] = - dev->cow.bitmap[req->cow_offset]; - req->bitmap_words[1] = - dev->cow.bitmap[req->cow_offset + 1]; - req->cow_offset *= sizeof(unsigned long); - req->cow_offset += dev->cow.bitmap_offset; + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); +} + +static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) +{ + __u64 sector; + unsigned char *bitmap; + int bit, i; + + /* mmap must have been requested on the command line */ + if(!ubd_do_mmap) + return(-1); + + /* The buffer must be page aligned */ + if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) + return(-1); + + /* The request must be a page long */ + if((req->current_nr_sectors << 9) != PAGE_SIZE) + return(-1); + + if(dev->cow.file == NULL) + return(dev->fd); + + sector = offset >> 9; + bitmap = (unsigned char *) dev->cow.bitmap; + bit = ubd_test_bit(sector, bitmap); + + for(i = 1; i < req->current_nr_sectors; i++){ + if(ubd_test_bit(sector + i, bitmap) != bit) + return(-1); + } + + if(bit || (rq_data_dir(req) == WRITE)) + offset += dev->cow.data_offset; + + /* The data on disk must be page aligned */ + if((offset % UBD_MMAP_BLOCK_SIZE) != 0) + return(-1); + + return(bit ? dev->fd : dev->cow.fd); +} + +static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, + struct request *req, + struct io_thread_req *io_req) +{ + int err; + + if(rq_data_dir(req) == WRITE){ + /* Writes are almost no-ops since the new data is already in the + * host page cache + */ + dev->map_writes++; + if(dev->cow.file != NULL) + cowify_bitmap(io_req->offset, io_req->length, + &io_req->sector_mask, &io_req->cow_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + io_req->bitmap_words, + dev->cow.bitmap_len); + } + else { + int w; + + if((dev->cow.file != NULL) && (fd == dev->cow.fd)) + w = 0; + else w = dev->openflags.w; + + if((dev->cow.file != NULL) && (fd == dev->fd)) + offset += dev->cow.data_offset; + + err = physmem_subst_mapping(req->buffer, fd, offset, w); + if(err){ + printk("physmem_subst_mapping failed, err = %d\n", + -err); + return(1); } + dev->map_reads++; } + io_req->op = UBD_MMAP; + io_req->buffer = req->buffer; + return(0); } static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; - __u64 block; - int nsect; + __u64 offset; + int len, fd; if(req->rq_status == RQ_INACTIVE) return(1); - if(dev->is_dir){ - strcpy(req->buffer, "HOSTFS:"); - strcat(req->buffer, dev->file); - spin_lock(&ubd_io_lock); - end_request(req, 1); - spin_unlock(&ubd_io_lock); - return(1); - } - if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); @@ -814,23 +981,49 @@ static int prepare_request(struct reques return(1); } - block = req->sector; - nsect = req->current_nr_sectors; + offset = ((__u64) req->sector) << 9; + len = req->current_nr_sectors << 9; - io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->map_fd = -1; + io_req->cow_offset = -1; + io_req->offset = offset; + io_req->length = len; + io_req->error = 0; + io_req->sector_mask = 0; + + fd = mmap_fd(req, dev, io_req->offset); + if(fd > 0){ + /* If mmapping is otherwise OK, but the first access to the + * page is a write, then it's not mapped in yet. So we have + * to write the data to disk first, then we can map the disk + * page in and continue normally from there. + */ + if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){ + io_req->map_fd = dev->fd; + io_req->map_offset = io_req->offset + + dev->cow.data_offset; + dev->write_maps++; + } + else return(prepare_mmap_request(dev, fd, io_req->offset, req, + io_req)); + } + + if(rq_data_dir(req) == READ) + dev->nomap_reads++; + else dev->nomap_writes++; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->offset = ((__u64) block) << 9; - io_req->length = nsect << 9; io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->sector_mask = 0; - io_req->cow_offset = -1; - io_req->error = 0; - if(dev->cow.file != NULL) cowify_req(io_req, dev); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + return(0); } @@ -885,7 +1078,7 @@ static int ubd_ioctl(struct inode * inod g.heads = 128; g.sectors = 32; g.cylinders = dev->size / (128 * 32 * 512); - g.start = 2; + g.start = get_start_sect(inode->i_bdev); return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); case HDIO_SET_UNMASKINTR: @@ -935,6 +1128,142 @@ static int ubd_ioctl(struct inode * inod return(-EINVAL); } +static int ubd_check_remapped(int fd, unsigned long address, int is_write, + __u64 offset) +{ + __u64 bitmap_offset; + unsigned long new_bitmap[2]; + int i, err, n; + + /* If it's not a write access, we can't do anything about it */ + if(!is_write) + return(0); + + /* We have a write */ + for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ + struct ubd *dev = &ubd_dev[i]; + + if((dev->fd != fd) && (dev->cow.fd != fd)) + continue; + + /* It's a write to a ubd device */ + + if(!dev->openflags.w){ + /* It's a write access on a read-only device - probably + * shouldn't happen. If the kernel is trying to change + * something with no intention of writing it back out, + * then this message will clue us in that this needs + * fixing + */ + printk("Write access to mapped page from readonly ubd " + "device %d\n", i); + return(0); + } + + /* It's a write to a writeable ubd device - it must be COWed + * because, otherwise, the page would have been mapped in + * writeable + */ + + if(!dev->cow.file) + panic("Write fault on writeable non-COW ubd device %d", + i); + + /* It should also be an access to the backing file since the + * COW pages should be mapped in read-write + */ + + if(fd == dev->fd) + panic("Write fault on a backing page of ubd " + "device %d\n", i); + + /* So, we do the write, copying the backing data to the COW + * file... + */ + + err = os_seek_file(dev->fd, offset + dev->cow.data_offset); + if(err < 0) + panic("Couldn't seek to %lld in COW file of ubd " + "device %d, err = %d", + offset + dev->cow.data_offset, i, -err); + + n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); + if(n != PAGE_SIZE) + panic("Couldn't copy data to COW file of ubd " + "device %d, err = %d", i, -n); + + /* ... updating the COW bitmap... */ + + cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + new_bitmap, dev->cow.bitmap_len); + + err = os_seek_file(dev->fd, bitmap_offset); + if(err < 0) + panic("Couldn't seek to %lld in COW file of ubd " + "device %d, err = %d", bitmap_offset, i, -err); + + n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); + if(n != sizeof(new_bitmap)) + panic("Couldn't update bitmap of ubd device %d, " + "err = %d", i, -n); + + /* Maybe we can map the COW page in, and maybe we can't. If + * it is a pre-V3 COW file, we can't, since the alignment will + * be wrong. If it is a V3 or later COW file which has been + * moved to a system with a larger page size, then maybe we + * can't, depending on the exact location of the page. + */ + + offset += dev->cow.data_offset; + + /* Remove the remapping, putting the original anonymous page + * back. If the COW file can be mapped in, that is done. + * Otherwise, the COW page is read in. + */ + + if(!physmem_remove_mapping((void *) address)) + panic("Address 0x%lx not remapped by ubd device %d", + address, i); + if((offset % UBD_MMAP_BLOCK_SIZE) == 0) + physmem_subst_mapping((void *) address, dev->fd, + offset, 1); + else { + err = os_seek_file(dev->fd, offset); + if(err < 0) + panic("Couldn't seek to %lld in COW file of " + "ubd device %d, err = %d", offset, i, + -err); + + n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); + if(n != PAGE_SIZE) + panic("Failed to read page from offset %llx of " + "COW file of ubd device %d, err = %d", + offset, i, -n); + } + + return(1); + } + + /* It's not a write on a ubd device */ + return(0); +} + +static struct remapper ubd_remapper = { + .list = LIST_HEAD_INIT(ubd_remapper.list), + .proc = ubd_check_remapped, +}; + +static int ubd_remapper_setup(void) +{ + if(ubd_do_mmap) + register_remapper(&ubd_remapper); + + return(0); +} + +__initcall(ubd_remapper_setup); + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.2-rc1/arch/um/drivers/ubd_user.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/drivers/ubd_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -11,11 +11,8 @@ #include #include #include -#include #include -#include #include -#include #include #include #include "asm/types.h" @@ -24,146 +21,30 @@ #include "user.h" #include "ubd_user.h" #include "os.h" +#include "cow.h" #include #include -#if __BYTE_ORDER == __BIG_ENDIAN -# define ntohll(x) (x) -# define htonll(x) (x) -#elif __BYTE_ORDER == __LITTLE_ENDIAN -# define ntohll(x) bswap_64(x) -# define htonll(x) bswap_64(x) -#else -#error "__BYTE_ORDER not defined" -#endif - -#define PATH_LEN_V1 256 - -struct cow_header_v1 { - int magic; - int version; - char backing_file[PATH_LEN_V1]; - time_t mtime; - __u64 size; - int sectorsize; -}; - -#define PATH_LEN_V2 MAXPATHLEN - -struct cow_header_v2 { - unsigned long magic; - unsigned long version; - char backing_file[PATH_LEN_V2]; - time_t mtime; - __u64 size; - int sectorsize; -}; - -union cow_header { - struct cow_header_v1 v1; - struct cow_header_v2 v2; -}; - -#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -#define COW_VERSION 2 - -static void sizes(__u64 size, int sectorsize, int bitmap_offset, - unsigned long *bitmap_len_out, int *data_offset_out) -{ - *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); - - *data_offset_out = bitmap_offset + *bitmap_len_out; - *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize; - *data_offset_out *= sectorsize; -} - -static int read_cow_header(int fd, int *magic_out, char **backing_file_out, - time_t *mtime_out, __u64 *size_out, - int *sectorsize_out, int *bitmap_offset_out) -{ - union cow_header *header; - char *file; - int err, n; - unsigned long version, magic; - - header = um_kmalloc(sizeof(*header)); - if(header == NULL){ - printk("read_cow_header - Failed to allocate header\n"); - return(-ENOMEM); - } - err = -EINVAL; - n = read(fd, header, sizeof(*header)); - if(n < offsetof(typeof(header->v1), backing_file)){ - printk("read_cow_header - short header\n"); - goto out; - } - - magic = header->v1.magic; - if(magic == COW_MAGIC) { - version = header->v1.version; - } - else if(magic == ntohl(COW_MAGIC)){ - version = ntohl(header->v1.version); - } - else goto out; - - *magic_out = COW_MAGIC; - - if(version == 1){ - if(n < sizeof(header->v1)){ - printk("read_cow_header - failed to read V1 header\n"); - goto out; - } - *mtime_out = header->v1.mtime; - *size_out = header->v1.size; - *sectorsize_out = header->v1.sectorsize; - *bitmap_offset_out = sizeof(header->v1); - file = header->v1.backing_file; - } - else if(version == 2){ - if(n < sizeof(header->v2)){ - printk("read_cow_header - failed to read V2 header\n"); - goto out; - } - *mtime_out = ntohl(header->v2.mtime); - *size_out = ntohll(header->v2.size); - *sectorsize_out = ntohl(header->v2.sectorsize); - *bitmap_offset_out = sizeof(header->v2); - file = header->v2.backing_file; - } - else { - printk("read_cow_header - invalid COW version\n"); - goto out; - } - err = -ENOMEM; - *backing_file_out = uml_strdup(file); - if(*backing_file_out == NULL){ - printk("read_cow_header - failed to allocate backing file\n"); - goto out; - } - err = 0; - out: - kfree(header); - return(err); -} static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) { - struct stat buf1, buf2; + struct uml_stat buf1, buf2; + int err; if(from_cmdline == NULL) return(1); if(!strcmp(from_cmdline, from_cow)) return(1); - if(stat(from_cmdline, &buf1) < 0){ - printk("Couldn't stat '%s', errno = %d\n", from_cmdline, - errno); + err = os_stat_file(from_cmdline, &buf1); + if(err < 0){ + printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); return(1); } - if(stat(from_cow, &buf2) < 0){ - printk("Couldn't stat '%s', errno = %d\n", from_cow, errno); + err = os_stat_file(from_cow, &buf2); + if(err < 0){ + printk("Couldn't stat '%s', err = %d\n", from_cow, -err); return(1); } - if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino)) + if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) return(1); printk("Backing file mismatch - \"%s\" requested,\n" @@ -174,20 +55,21 @@ static int same_backing_files(char *from static int backing_file_mismatch(char *file, __u64 size, time_t mtime) { - struct stat64 buf; + unsigned long modtime; long long actual; int err; - if(stat64(file, &buf) < 0){ - printk("Failed to stat backing file \"%s\", errno = %d\n", - file, errno); - return(-errno); + err = os_file_modtime(file, &modtime); + if(err < 0){ + printk("Failed to get modification time of backing file " + "\"%s\", err = %d\n", file, -err); + return(err); } err = os_file_size(file, &actual); - if(err){ + if(err < 0){ printk("Failed to get size of backing file \"%s\", " - "errno = %d\n", file, -err); + "err = %d\n", file, -err); return(err); } @@ -196,9 +78,9 @@ static int backing_file_mismatch(char *f "file\n", size, actual); return(-EINVAL); } - if(buf.st_mtime != mtime){ + if(modtime != mtime){ printk("mtime mismatch (%ld vs %ld) of COW header vs backing " - "file\n", mtime, buf.st_mtime); + "file\n", mtime, modtime); return(-EINVAL); } return(0); @@ -209,124 +91,16 @@ int read_cow_bitmap(int fd, void *buf, i int err; err = os_seek_file(fd, offset); - if(err != 0) return(-errno); - err = read(fd, buf, len); - if(err < 0) return(-errno); - return(0); -} + if(err < 0) + return(err); -static int absolutize(char *to, int size, char *from) -{ - char save_cwd[256], *slash; - int remaining; + err = os_read_file(fd, buf, len); + if(err < 0) + return(err); - if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { - printk("absolutize : unable to get cwd - errno = %d\n", errno); - return(-1); - } - slash = strrchr(from, '/'); - if(slash != NULL){ - *slash = '\0'; - if(chdir(from)){ - *slash = '/'; - printk("absolutize : Can't cd to '%s' - errno = %d\n", - from, errno); - return(-1); - } - *slash = '/'; - if(getcwd(to, size) == NULL){ - printk("absolutize : unable to get cwd of '%s' - " - "errno = %d\n", from, errno); - return(-1); - } - remaining = size - strlen(to); - if(strlen(slash) + 1 > remaining){ - printk("absolutize : unable to fit '%s' into %d " - "chars\n", from, size); - return(-1); - } - strcat(to, slash); - } - else { - if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ - printk("absolutize : unable to fit '%s' into %d " - "chars\n", from, size); - return(-1); - } - strcpy(to, save_cwd); - strcat(to, "/"); - strcat(to, from); - } - chdir(save_cwd); return(0); } -static int write_cow_header(char *cow_file, int fd, char *backing_file, - int sectorsize, long long *size) -{ - struct cow_header_v2 *header; - struct stat64 buf; - int err; - - err = os_seek_file(fd, 0); - if(err != 0){ - printk("write_cow_header - lseek failed, errno = %d\n", errno); - return(-errno); - } - - err = -ENOMEM; - header = um_kmalloc(sizeof(*header)); - if(header == NULL){ - printk("Failed to allocate COW V2 header\n"); - goto out; - } - header->magic = htonl(COW_MAGIC); - header->version = htonl(COW_VERSION); - - err = -EINVAL; - if(strlen(backing_file) > sizeof(header->backing_file) - 1){ - printk("Backing file name \"%s\" is too long - names are " - "limited to %d characters\n", backing_file, - sizeof(header->backing_file) - 1); - goto out_free; - } - - if(absolutize(header->backing_file, sizeof(header->backing_file), - backing_file)) - goto out_free; - - err = stat64(header->backing_file, &buf); - if(err < 0){ - printk("Stat of backing file '%s' failed, errno = %d\n", - header->backing_file, errno); - err = -errno; - goto out_free; - } - - err = os_file_size(header->backing_file, size); - if(err){ - printk("Couldn't get size of backing file '%s', errno = %d\n", - header->backing_file, -*size); - goto out_free; - } - - header->mtime = htonl(buf.st_mtime); - header->size = htonll(*size); - header->sectorsize = htonl(sectorsize); - - err = write(fd, header, sizeof(*header)); - if(err != sizeof(*header)){ - printk("Write of header to new COW file '%s' failed, " - "errno = %d\n", cow_file, errno); - goto out_free; - } - err = 0; - out_free: - kfree(header); - out: - return(err); -} - int open_ubd_file(char *file, struct openflags *openflags, char **backing_file_out, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out, @@ -334,26 +108,36 @@ int open_ubd_file(char *file, struct ope { time_t mtime; __u64 size; + __u32 version, align; char *backing_file; - int fd, err, sectorsize, magic, same, mode = 0644; + int fd, err, sectorsize, same, mode = 0644; - if((fd = os_open_file(file, *openflags, mode)) < 0){ + fd = os_open_file(file, *openflags, mode); + if(fd < 0){ if((fd == -ENOENT) && (create_cow_out != NULL)) *create_cow_out = 1; if(!openflags->w || ((errno != EROFS) && (errno != EACCES))) return(-errno); openflags->w = 0; - if((fd = os_open_file(file, *openflags, mode)) < 0) + fd = os_open_file(file, *openflags, mode); + if(fd < 0) return(fd); } + + err = os_lock_file(fd, openflags->w); + if(err < 0){ + printk("Failed to lock '%s', err = %d\n", file, -err); + goto out_close; + } + if(backing_file_out == NULL) return(fd); - err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, - §orsize, bitmap_offset_out); + err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, + &size, §orsize, &align, bitmap_offset_out); if(err && (*backing_file_out != NULL)){ printk("Failed to read COW header from COW file \"%s\", " - "errno = %d\n", file, err); - goto error; + "errno = %d\n", file, -err); + goto out_close; } if(err) return(fd); @@ -363,36 +147,33 @@ int open_ubd_file(char *file, struct ope if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ printk("Switching backing file to '%s'\n", *backing_file_out); - err = write_cow_header(file, fd, *backing_file_out, - sectorsize, &size); + err = write_cow_header(file, fd, *backing_file_out, + sectorsize, align, &size); if(err){ - printk("Switch failed, errno = %d\n", err); + printk("Switch failed, errno = %d\n", -err); return(err); } } else { *backing_file_out = backing_file; err = backing_file_mismatch(*backing_file_out, size, mtime); - if(err) goto error; + if(err) goto out_close; } - sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, - data_offset_out); + cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, + bitmap_len_out, data_offset_out); return(fd); - error: - close(fd); + out_close: + os_close_file(fd); return(err); } int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, - int sectorsize, int *bitmap_offset_out, + int sectorsize, int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out) { - __u64 blocks; - long zero; - int err, fd, i; - long long size; + int err, fd; flags.c = 1; fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); @@ -403,57 +184,49 @@ int create_cow_file(char *cow_file, char goto out; } - err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size); - if(err) goto out_close; - - blocks = (size + sectorsize - 1) / sectorsize; - blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8); - zero = 0; - for(i = 0; i < blocks; i++){ - err = write(fd, &zero, sizeof(zero)); - if(err != sizeof(zero)){ - printk("Write of bitmap to new COW file '%s' failed, " - "errno = %d\n", cow_file, errno); - goto out_close; - } - } - - sizes(size, sectorsize, sizeof(struct cow_header_v2), - bitmap_len_out, data_offset_out); - *bitmap_offset_out = sizeof(struct cow_header_v2); - - return(fd); - - out_close: - close(fd); + err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, + bitmap_offset_out, bitmap_len_out, + data_offset_out); + if(!err) + return(fd); + os_close_file(fd); out: return(err); } +/* XXX Just trivial wrappers around os_read_file and os_write_file */ int read_ubd_fs(int fd, void *buffer, int len) { - int n; - - n = read(fd, buffer, len); - if(n < 0) return(-errno); - else return(n); + return(os_read_file(fd, buffer, len)); } int write_ubd_fs(int fd, char *buffer, int len) { - int n; - - n = write(fd, buffer, len); - if(n < 0) return(-errno); - else return(n); + return(os_write_file(fd, buffer, len)); } -int ubd_is_dir(char *file) +static int update_bitmap(struct io_thread_req *req) { - struct stat64 buf; + int n; + + if(req->cow_offset == -1) + return(0); + + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return(1); + } + + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return(1); + } - if(stat64(file, &buf) < 0) return(0); - return(S_ISDIR(buf.st_mode)); + return(0); } void do_io(struct io_thread_req *req) @@ -461,8 +234,18 @@ void do_io(struct io_thread_req *req) char *buf; unsigned long len; int n, nsectors, start, end, bit; + int err; __u64 off; + if(req->op == UBD_MMAP){ + /* Touch the page to force the host to do any necessary IO to + * get it into memory + */ + n = *((volatile int *) req->buffer); + req->error = update_bitmap(req); + return; + } + nsectors = req->length / req->sectorsize; start = 0; do { @@ -473,15 +256,14 @@ void do_io(struct io_thread_req *req) &req->sector_mask) == bit)) end++; - if(end != nsectors) - printk("end != nsectors\n"); off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; buf = &req->buffer[start * req->sectorsize]; - if(os_seek_file(req->fds[bit], off) != 0){ - printk("do_io - lseek failed : errno = %d\n", errno); + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); req->error = 1; return; } @@ -490,11 +272,10 @@ void do_io(struct io_thread_req *req) do { buf = &buf[n]; len -= n; - n = read(req->fds[bit], buf, len); + n = os_read_file(req->fds[bit], buf, len); if (n < 0) { - printk("do_io - read returned %d : " - "errno = %d fd = %d\n", n, - errno, req->fds[bit]); + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); req->error = 1; return; } @@ -502,11 +283,10 @@ void do_io(struct io_thread_req *req) if (n < len) memset(&buf[n], 0, len - n); } else { - n = write(req->fds[bit], buf, len); + n = os_write_file(req->fds[bit], buf, len); if(n != len){ - printk("do_io - write returned %d : " - "errno = %d fd = %d\n", n, - errno, req->fds[bit]); + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); req->error = 1; return; } @@ -515,24 +295,7 @@ void do_io(struct io_thread_req *req) start = end; } while(start < nsectors); - if(req->cow_offset != -1){ - if(os_seek_file(req->fds[1], req->cow_offset) != 0){ - printk("do_io - bitmap lseek failed : errno = %d\n", - errno); - req->error = 1; - return; - } - n = write(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words)); - if(n != sizeof(req->bitmap_words)){ - printk("do_io - bitmap update returned %d : " - "errno = %d fd = %d\n", n, errno, req->fds[1]); - req->error = 1; - return; - } - } - req->error = 0; - return; + req->error = update_bitmap(req); } /* Changed in start_io_thread, which is serialized by being called only @@ -550,19 +313,23 @@ int io_thread(void *arg) signal(SIGWINCH, SIG_IGN); while(1){ - n = read(kernel_fd, &req, sizeof(req)); - if(n < 0) printk("io_thread - read returned %d, errno = %d\n", - n, errno); - else if(n < sizeof(req)){ - printk("io_thread - short read : length = %d\n", n); + n = os_read_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } continue; } io_count++; do_io(&req); - n = write(kernel_fd, &req, sizeof(req)); + n = os_write_file(kernel_fd, &req, sizeof(req)); if(n != sizeof(req)) - printk("io_thread - write failed, errno = %d\n", - errno); + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); } } @@ -571,10 +338,11 @@ int start_io_thread(unsigned long sp, in int pid, fds[2], err; err = os_pipe(fds, 1, 1); - if(err){ - printk("start_io_thread - os_pipe failed, errno = %d\n", -err); - return(-1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; } + kernel_fd = fds[0]; *fd_out = fds[1]; @@ -582,32 +350,19 @@ int start_io_thread(unsigned long sp, in NULL); if(pid < 0){ printk("start_io_thread - clone failed : errno = %d\n", errno); - return(-errno); + goto out_close; } - return(pid); -} - -#ifdef notdef -int start_io_thread(unsigned long sp, int *fd_out) -{ - int pid; - if((kernel_fd = get_pty()) < 0) return(-1); - raw(kernel_fd, 0); - if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){ - printk("Couldn't open tty for IO\n"); - return(-1); - } - - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, - NULL); - if(pid < 0){ - printk("start_io_thread - clone failed : errno = %d\n", errno); - return(-errno); - } return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return(err); } -#endif /* * Overrides for Emacs so that we follow Linus's tabbing style. --- linux-2.6.2-rc1/arch/um/drivers/xterm.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/drivers/xterm.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -36,7 +35,8 @@ void *xterm_init(char *str, int device, { struct xterm_chan *data; - if((data = malloc(sizeof(*data))) == NULL) return(NULL); + data = malloc(sizeof(*data)); + if(data == NULL) return(NULL); *data = ((struct xterm_chan) { .pid = -1, .helper_pid = -1, .device = device, @@ -93,7 +93,7 @@ int xterm_open(int input, int output, in "/usr/lib/uml/port-helper", "-uml-socket", file, NULL }; - if(access(argv[4], X_OK)) + if(os_access(argv[4], OS_ACC_X_OK) < 0) argv[4] = "port-helper"; fd = mkstemp(file); @@ -106,13 +106,13 @@ int xterm_open(int input, int output, in printk("xterm_open : unlink failed, errno = %d\n", errno); return(-errno); } - close(fd); + os_close_file(fd); - fd = create_unix_socket(file, sizeof(file)); + fd = os_create_unix_socket(file, sizeof(file), 1); if(fd < 0){ printk("xterm_open : create_unix_socket failed, errno = %d\n", -fd); - return(-fd); + return(fd); } sprintf(title, data->title, data->device); @@ -128,15 +128,16 @@ int xterm_open(int input, int output, in if(data->direct_rcv) new = os_rcv_fd(fd, &data->helper_pid); else { - if((err = os_set_fd_block(fd, 0)) != 0){ + err = os_set_fd_block(fd, 0); + if(err < 0){ printk("xterm_open : failed to set descriptor " - "non-blocking, errno = %d\n", err); + "non-blocking, err = %d\n", -err); return(err); } new = xterm_fd(fd, &data->helper_pid); } if(new < 0){ - printk("xterm_open : os_rcv_fd failed, errno = %d\n", -new); + printk("xterm_open : os_rcv_fd failed, err = %d\n", -new); goto out; } @@ -160,7 +161,7 @@ void xterm_close(int fd, void *d) if(data->helper_pid != -1) os_kill_process(data->helper_pid, 0); data->helper_pid = -1; - close(fd); + os_close_file(fd); } void xterm_free(void *d) --- linux-2.6.2-rc1/arch/um/drivers/xterm_kern.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/drivers/xterm_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,9 +5,12 @@ #include "linux/errno.h" #include "linux/slab.h" +#include "linux/signal.h" +#include "linux/interrupt.h" #include "asm/semaphore.h" #include "asm/irq.h" #include "irq_user.h" +#include "irq_kern.h" #include "kern_util.h" #include "os.h" #include "xterm.h" @@ -19,17 +22,18 @@ struct xterm_wait { int new_fd; }; -static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) +static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs) { struct xterm_wait *xterm = data; int fd; fd = os_rcv_fd(xterm->fd, &xterm->pid); if(fd == -EAGAIN) - return; + return(IRQ_NONE); xterm->new_fd = fd; up(&xterm->sem); + return(IRQ_HANDLED); } int xterm_fd(int socket, int *pid_out) @@ -54,7 +58,8 @@ int xterm_fd(int socket, int *pid_out) if(err){ printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " "err = %d\n", err); - return(err); + ret = err; + goto out; } down(&data->sem); @@ -62,6 +67,7 @@ int xterm_fd(int socket, int *pid_out) ret = data->new_fd; *pid_out = data->pid; + out: kfree(data); return(ret); --- linux-2.6.2-rc1/arch/um/dyn.lds.S 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/um/dyn.lds.S 2004-01-21 23:30:29.000000000 -0800 @@ -10,12 +10,15 @@ SECTIONS { . = START + SIZEOF_HEADERS; .interp : { *(.interp) } - . = ALIGN(4096); __binary_start = .; . = ALIGN(4096); /* Init code and data */ _stext = .; __init_begin = .; - .text.init : { *(.text.init) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } . = ALIGN(4096); @@ -67,7 +70,7 @@ SECTIONS #include "asm/common.lds.S" - .data.init : { *(.data.init) } + init.data : { *(.init.data) } /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but --- linux-2.6.2-rc1/arch/um/include/2_5compat.h 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/include/2_5compat.h 2004-01-21 23:30:29.000000000 -0800 @@ -6,20 +6,6 @@ #ifndef __2_5_COMPAT_H__ #define __2_5_COMPAT_H__ -#include "linux/version.h" - -#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ - name : dev_name, \ - write : write_proc, \ - read : NULL, \ - device : device_proc, \ - setup : setup_proc, \ - flags : f, \ - index : -1, \ - cflag : 0, \ - next : NULL \ -} - #define INIT_HARDSECT(arr, maj, sizes) #define SET_PRI(task) do ; while(0) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/include/irq_kern.h 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __IRQ_KERN_H__ +#define __IRQ_KERN_H__ + +#include "linux/interrupt.h" + +extern int um_request_irq(unsigned int irq, int fd, int type, + irqreturn_t (*handler)(int, void *, + struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/include/kern_util.h 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/include/kern_util.h 2004-01-21 23:30:29.000000000 -0800 @@ -63,10 +63,9 @@ extern void init_flush_vm(void); extern void *syscall_sp(void *t); extern void syscall_trace(void); extern int hz(void); -extern void idle_timer(void); +extern void uml_idle_timer(void); extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); extern int external_pid(void *t); -extern int pid_to_processor_id(int pid); extern void boot_timer_handler(int sig); extern void interrupt_end(void); extern void initial_thread_cb(void (*proc)(void *), void *arg); @@ -90,9 +89,7 @@ extern int remove_gdb(void); extern char *uml_strdup(char *string); extern void unprotect_kernel_mem(void); extern void protect_kernel_mem(void); -extern void set_kmem_end(unsigned long); extern void uml_cleanup(void); -extern int pid_to_processor_id(int pid); extern void set_current(void *t); extern void lock_signalled_task(void *t); extern void IPI_handler(int cpu); @@ -101,7 +98,9 @@ extern void *get_init_task(void); extern int clear_user_proc(void *buf, int size); extern int copy_to_user_proc(void *to, void *from, int size); extern int copy_from_user_proc(void *to, void *from, int size); +extern int strlen_user_proc(char *str); extern void bus_handler(int sig, union uml_pt_regs *regs); +extern void winch(int sig, union uml_pt_regs *regs); extern long execute_syscall(void *r); extern int smp_sigio_handler(void); extern void *get_current(void); @@ -112,6 +111,8 @@ extern void arch_switch(void); extern void free_irq(unsigned int, void *); extern int um_in_interrupt(void); extern int cpu(void); +extern unsigned long long time_stamp(void); + #endif /* --- linux-2.6.2-rc1/arch/um/include/line.h 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/include/line.h 2004-01-21 23:30:29.000000000 -0800 @@ -9,12 +9,14 @@ #include "linux/list.h" #include "linux/workqueue.h" #include "linux/tty.h" +#include "linux/interrupt.h" #include "asm/semaphore.h" #include "chan_user.h" #include "mconsole_kern.h" struct line_driver { char *name; + char *device_name; char *devfs_name; short major; short minor_start; @@ -67,8 +69,6 @@ struct lines { #define LINES_INIT(n) { num : n } -extern void line_interrupt(int irq, void *data, struct pt_regs *unused); -extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused); extern void line_close(struct line *lines, struct tty_struct *tty); extern int line_open(struct line *lines, struct tty_struct *tty, struct chan_opts *opts); --- linux-2.6.2-rc1/arch/um/include/mconsole.h 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/include/mconsole.h 2004-01-21 23:30:29.000000000 -0800 @@ -41,11 +41,13 @@ struct mconsole_notify { struct mc_request; +enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; + struct mconsole_command { char *command; void (*handler)(struct mc_request *req); - int as_interrupt; + enum mc_context context; }; struct mc_request @@ -77,6 +79,8 @@ extern void mconsole_sysrq(struct mc_req extern void mconsole_cad(struct mc_request *req); extern void mconsole_stop(struct mc_request *req); extern void mconsole_go(struct mc_request *req); +extern void mconsole_log(struct mc_request *req); +extern void mconsole_proc(struct mc_request *req); extern int mconsole_get_request(int fd, struct mc_request *req); extern int mconsole_notify(char *sock_name, int type, const void *data, --- linux-2.6.2-rc1/arch/um/include/mem.h 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/include/mem.h 2004-01-21 23:30:29.000000000 -0800 @@ -1,19 +1,17 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ #ifndef __MEM_H__ #define __MEM_H__ -struct vm_reserved { - struct list_head list; - unsigned long start; - unsigned long end; -}; +#include "linux/types.h" -extern void set_usable_vm(unsigned long start, unsigned long end); -extern void set_kmem_end(unsigned long new); +extern int phys_mapping(unsigned long phys, __u64 *offset_out); +extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); +extern int is_remapped(void *virt); +extern int physmem_remove_mapping(void *virt); #endif --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/include/mem_kern.h 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __MEM_KERN_H__ +#define __MEM_KERN_H__ + +#include "linux/list.h" +#include "linux/types.h" + +struct remapper { + struct list_head list; + int (*proc)(int, unsigned long, int, __u64); +}; + +extern void register_remapper(struct remapper *info); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/include/mem_user.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/include/mem_user.h 2004-01-21 23:30:29.000000000 -0800 @@ -32,43 +32,38 @@ #ifndef _MEM_USER_H #define _MEM_USER_H -struct mem_region { +struct iomem_region { + struct iomem_region *next; char *driver; - unsigned long start_pfn; - unsigned long start; - unsigned long len; - void *mem_map; int fd; + int size; + unsigned long phys; + unsigned long virt; }; -extern struct mem_region *regions[]; -extern struct mem_region physmem_region; +extern struct iomem_region *iomem_regions; +extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long host_task_size; extern unsigned long task_size; +extern void check_devanon(void); extern int init_mem_user(void); extern int create_mem_file(unsigned long len); -extern void setup_range(int fd, char *driver, unsigned long start, - unsigned long pfn, unsigned long total, int need_vm, - struct mem_region *region, void *reserved); extern void setup_memory(void *entry); extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern int init_maps(struct mem_region *region); -extern int nregions(void); -extern int reserve_vm(unsigned long start, unsigned long end, void *e); +extern int init_maps(unsigned long physmem, unsigned long iomem, + unsigned long highmem); extern unsigned long get_vm(unsigned long len); extern void setup_physmem(unsigned long start, unsigned long usable, - unsigned long len); -extern int setup_region(struct mem_region *region, void *entry); + unsigned long len, unsigned long highmem); extern void add_iomem(char *name, int fd, unsigned long size); -extern struct mem_region *phys_region(unsigned long phys); extern unsigned long phys_offset(unsigned long phys); extern void unmap_physmem(void); -extern int map_memory(unsigned long virt, unsigned long phys, - unsigned long len, int r, int w, int x); +extern void map_memory(unsigned long virt, unsigned long phys, + unsigned long len, int r, int w, int x); extern int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, int must_succeed); extern unsigned long get_kmem_end(void); --- linux-2.6.2-rc1/arch/um/include/os.h 2003-06-14 12:18:02.000000000 -0700 +++ 25/arch/um/include/os.h 2004-01-21 23:30:29.000000000 -0800 @@ -17,6 +17,32 @@ #define OS_TYPE_FIFO 6 #define OS_TYPE_SOCK 7 +/* os_access() flags */ +#define OS_ACC_F_OK 0 /* Test for existence. */ +#define OS_ACC_X_OK 1 /* Test for execute permission. */ +#define OS_ACC_W_OK 2 /* Test for write permission. */ +#define OS_ACC_R_OK 4 /* Test for read permission. */ +#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ + +/* + * types taken from stat_file() in hostfs_user.c + * (if they are wrong here, they are wrong there...). + */ +struct uml_stat { + int ust_dev; /* device */ + unsigned long long ust_ino; /* inode */ + int ust_mode; /* protection */ + int ust_nlink; /* number of hard links */ + int ust_uid; /* user ID of owner */ + int ust_gid; /* group ID of owner */ + unsigned long long ust_size; /* total size, in bytes */ + int ust_blksize; /* blocksize for filesystem I/O */ + unsigned long long ust_blocks; /* number of blocks allocated */ + unsigned long ust_atime; /* time of last access */ + unsigned long ust_mtime; /* time of last modification */ + unsigned long ust_ctime; /* time of last change */ +}; + struct openflags { unsigned int r : 1; unsigned int w : 1; @@ -84,29 +110,47 @@ static inline struct openflags of_excl(s flags.e = 1; return(flags); } - + static inline struct openflags of_cloexec(struct openflags flags) { flags.cl = 1; return(flags); } +extern int os_stat_file(const char *file_name, struct uml_stat *buf); +extern int os_stat_fd(const int fd, struct uml_stat *buf); +extern int os_access(const char *file, int mode); +extern void os_print_error(int error, const char* str); +extern int os_get_exec_close(int fd, int *close_on_exec); +extern int os_set_exec_close(int fd, int close_on_exec); +extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); +extern int os_window_size(int fd, int *rows, int *cols); +extern int os_new_tty_pgrp(int fd, int pid); +extern int os_get_ifname(int fd, char *namebuf); +extern int os_set_slip(int fd); +extern int os_set_owner(int fd, int pid); +extern int os_sigio_async(int master, int slave); +extern int os_mode_fd(int fd, int mode); + extern int os_seek_file(int fd, __u64 offset); extern int os_open_file(char *file, struct openflags flags, int mode); extern int os_read_file(int fd, void *buf, int len); -extern int os_write_file(int fd, void *buf, int count); +extern int os_write_file(int fd, const void *buf, int count); extern int os_file_size(char *file, long long *size_out); +extern int os_file_modtime(char *file, unsigned long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); extern int os_set_fd_async(int fd, int owner); extern int os_set_fd_block(int fd, int blocking); extern int os_accept_connection(int fd); +extern int os_create_unix_socket(char *file, int len, int close_on_exec); extern int os_shutdown_socket(int fd, int r, int w); extern void os_close_file(int fd); extern int os_rcv_fd(int fd, int *helper_pid_out); -extern int create_unix_socket(char *file, int len); +extern int create_unix_socket(char *file, int len, int close_on_exec); extern int os_connect_socket(char *name); extern int os_file_type(char *file); extern int os_file_mode(char *file, struct openflags *mode_out); +extern int os_lock_file(int fd, int excl); extern unsigned long os_process_pc(int pid); extern int os_process_parent(int pid); @@ -115,11 +159,12 @@ extern void os_kill_process(int pid, int extern void os_usr1_process(int pid); extern int os_getpid(void); -extern int os_map_memory(void *virt, int fd, unsigned long off, +extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); extern int os_protect_memory(void *addr, unsigned long len, int r, int w, int x); extern int os_unmap_memory(void *addr, int len); +extern void os_flush_stdout(void); #endif --- linux-2.6.2-rc1/arch/um/include/skas_ptrace.h 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/include/skas_ptrace.h 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ --- linux-2.6.2-rc1/arch/um/include/sysdep-i386/frame_user.h 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/include/sysdep-i386/frame_user.h 2004-01-21 23:30:29.000000000 -0800 @@ -56,26 +56,26 @@ static inline void setup_arch_frame(stru * it would have to be __builtin_frame_address(1). */ -static inline unsigned long frame_restorer(void) -{ - unsigned long *fp; - - fp = __builtin_frame_address(0); - return((unsigned long) (fp + 1)); -} +#define frame_restorer() \ +({ \ + unsigned long *fp; \ +\ + fp = __builtin_frame_address(0); \ + ((unsigned long) (fp + 1)); \ +}) /* Similarly, this returns the value of sp when the handler was first * entered. This is used to calculate the proper sp when delivering * signals. */ -static inline unsigned long frame_sp(void) -{ - unsigned long *fp; - - fp = __builtin_frame_address(0); - return((unsigned long) (fp + 1)); -} +#define frame_sp() \ +({ \ + unsigned long *fp; \ +\ + fp = __builtin_frame_address(0); \ + ((unsigned long) (fp + 1)); \ +}) #endif --- linux-2.6.2-rc1/arch/um/include/sysdep-i386/sigcontext.h 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/include/sysdep-i386/sigcontext.h 2004-01-21 23:30:29.000000000 -0800 @@ -28,8 +28,8 @@ */ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) -/* These are General Protection and Page Fault */ -#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14)) +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(trap) (trap == 14) #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) --- linux-2.6.2-rc1/arch/um/include/ubd_user.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/include/ubd_user.h 2004-01-21 23:30:29.000000000 -0800 @@ -9,7 +9,7 @@ #include "os.h" -enum ubd_req { UBD_READ, UBD_WRITE }; +enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; struct io_thread_req { enum ubd_req op; @@ -20,8 +20,10 @@ struct io_thread_req { char *buffer; int sectorsize; unsigned long sector_mask; - unsigned long cow_offset; + unsigned long long cow_offset; unsigned long bitmap_words[2]; + int map_fd; + unsigned long long map_offset; int error; }; @@ -31,7 +33,7 @@ extern int open_ubd_file(char *file, str int *create_cow_out); extern int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, int sectorsize, - int *bitmap_offset_out, + int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); @@ -39,7 +41,6 @@ extern int read_ubd_fs(int fd, void *buf extern int write_ubd_fs(int fd, char *buffer, int len); extern int start_io_thread(unsigned long sp, int *fds_out); extern void do_io(struct io_thread_req *req); -extern int ubd_is_dir(char *file); static inline int ubd_test_bit(__u64 bit, unsigned char *data) { --- linux-2.6.2-rc1/arch/um/include/um_uaccess.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/include/um_uaccess.h 2004-01-21 23:30:29.000000000 -0800 @@ -38,22 +38,73 @@ static inline int copy_to_user(void *to, from, n)); } +/* + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ + static inline int strncpy_from_user(char *dst, const char *src, int count) { return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, dst, src, count)); } +/* + * __clear_user: - Zero a block of memory in user space, with less checking. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ static inline int __clear_user(void *mem, int len) { return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); } +/* + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ static inline int clear_user(void *mem, int len) { return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); } +/* + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * @n: The maximum valid length + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. + */ static inline int strnlen_user(const void *str, int len) { return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); --- linux-2.6.2-rc1/arch/um/include/user.h 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/include/user.h 2004-01-21 23:30:29.000000000 -0800 @@ -14,6 +14,7 @@ extern void *um_kmalloc_atomic(int size) extern void kfree(void *ptr); extern int in_aton(char *str); extern int open_gdb_chan(void); +extern int strlcpy(char *, const char *, int); #endif --- linux-2.6.2-rc1/arch/um/include/user_util.h 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/include/user_util.h 2004-01-21 23:30:29.000000000 -0800 @@ -14,8 +14,6 @@ extern int grantpt(int __fd); extern int unlockpt(int __fd); extern char *ptsname(int __fd); -enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; - struct cpu_task { int pid; void *task; @@ -59,7 +57,6 @@ extern int wait_for_stop(int pid, int si extern void *add_signal_handler(int sig, void (*handler)(int)); extern int start_fork_tramp(void *arg, unsigned long temp_stack, int clone_flags, int (*tramp)(void *)); -extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags); extern int linux_main(int argc, char **argv); extern void set_cmdline(char *cmd); extern void input_cb(void (*proc)(void *), void *arg, int arg_len); @@ -86,11 +83,13 @@ extern void check_sigio(void); extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); extern void write_sigio_workaround(void); extern void arch_check_bugs(void); +extern int cpu_feature(char *what, char *buf, int len); extern int arch_handle_signal(int sig, union uml_pt_regs *regs); extern int arch_fixup(unsigned long address, void *sc_ptr); extern void forward_pending_sigio(int target); extern int can_do_skas(void); - +extern void arch_init_thread(void); + #endif /* --- linux-2.6.2-rc1/arch/um/Kconfig 2004-01-20 21:51:18.000000000 -0800 +++ 25/arch/um/Kconfig 2004-01-21 23:30:29.000000000 -0800 @@ -61,6 +61,20 @@ config MODE_SKAS config NET bool "Networking support" + help + Unless you really know what you are doing, you should say Y here. + The reason is that some programs need kernel networking support even + when running on a stand-alone machine that isn't connected to any + other computer. If you are upgrading from an older kernel, you + should consider updating your networking tools too because changes + in the kernel and the tools often go hand in hand. The tools are + contained in the package net-tools, the location and version number + of which are given in Documentation/Changes. + + For a general introduction to Linux networking, it is highly + recommended to read the NET-HOWTO, available from + . + source "fs/Kconfig.binfmt" @@ -85,6 +99,19 @@ config HOSTFS If you'd like to be able to work with files stored on the host, say Y or M here; otherwise say N. +config HPPFS + tristate "HoneyPot ProcFS" + help + hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc + entries to be overridden, removed, or fabricated from the host. + Its purpose is to allow a UML to appear to be a physical machine + by removing or changing anything in /proc which gives away the + identity of a UML. + + See http://user-mode-linux.sf.net/hppfs.html for more information. + + You only need this if you are setting up a UML honeypot. Otherwise, + it is safe to say 'N' here. config MCONSOLE bool "Management console" @@ -105,6 +132,16 @@ config MCONSOLE config MAGIC_SYSRQ bool "Magic SysRq key" depends on MCONSOLE + help + If you say Y here, you will have some control over the system even + if the system crashes for example during kernel debugging (e.g., you + will be able to flush the buffer cache to disk, reboot the system + immediately or dump some status information). This is accomplished + by pressing various keys while holding SysRq (Alt+PrintScreen). It + also works on a serial console (on PC hardware at least), if you + send a BREAK and then within 5 seconds a command keypress. The + keys are documented in Documentation/sysrq.txt. Don't say Y + unless you really know what this hack does. config HOST_2G_2G bool "2G/2G host address space split" @@ -160,6 +197,9 @@ config KERNEL_HALF_GIGS config HIGHMEM bool "Highmem support" +config PROC_MM + bool "/proc/mm support" + config KERNEL_STACK_ORDER int "Kernel stack size order" default 2 @@ -168,6 +208,17 @@ config KERNEL_STACK_ORDER be 1 << order pages. The default is OK unless you're running Valgrind on UML, in which case, set this to 3. +config UML_REAL_TIME_CLOCK + bool "Real-time Clock" + default y + help + This option makes UML time deltas match wall clock deltas. This should + normally be enabled. The exception would be if you are debugging with + UML and spend long times with UML stopped at a breakpoint. In this + case, when UML is restarted, it will call the timer enough times to make + up for the time spent at the breakpoint. This could result in a + noticable lag. If this is a problem, then disable this option. + endmenu source "init/Kconfig" @@ -240,6 +291,10 @@ config FRAME_POINTER config PT_PROXY bool "Enable ptrace proxy" depends on XTERM_CHAN && DEBUG_INFO + help + This option enables a debugging interface which allows gdb to debug + the kernel without needing to actually attach to kernel threads. + If you want to do kernel debugging, say Y here; otherwise say N. config GPROF bool "Enable gprof support" --- linux-2.6.2-rc1/arch/um/Kconfig_block 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/um/Kconfig_block 2004-01-21 23:30:29.000000000 -0800 @@ -29,6 +29,20 @@ config BLK_DEV_UBD_SYNC wise choice too. In all other cases (for example, if you're just playing around with User-Mode Linux) you can choose N. +# Turn this back on when the driver actually works +# +#config BLK_DEV_COW +# tristate "COW block device" +# help +# This is a layered driver which sits above two other block devices. +# One is read-only, and the other is a read-write layer which stores +# all changes. This provides the illusion that the read-only layer +# can be mounted read-write and changed. + +config BLK_DEV_COW_COMMON + bool + default BLK_DEV_COW || BLK_DEV_UBD + config BLK_DEV_LOOP tristate "Loopback device support" --- linux-2.6.2-rc1/arch/um/Kconfig_net 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/Kconfig_net 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ -menu "Network Devices" +menu "UML Network Devices" depends on NET # UML virtual driver @@ -176,73 +176,5 @@ config UML_NET_SLIRP Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" - -# Below are hardware-independent drivers mirrored from -# drivers/net/Config.in. It would be nice if Linux -# had HW independent drivers separated from the other -# but it does not. Until then each non-ISA/PCI arch -# needs to provide it's own menu of network drivers -config DUMMY - tristate "Dummy net driver support" - -config BONDING - tristate "Bonding driver support" - -config EQUALIZER - tristate "EQL (serial line load balancing) support" - -config TUN - tristate "Universal TUN/TAP device driver support" - -config ETHERTAP - tristate "Ethertap network tap (OBSOLETE)" - depends on EXPERIMENTAL && NETLINK - -config PPP - tristate "PPP (point-to-point protocol) support" - -config PPP_MULTILINK - bool "PPP multilink support (EXPERIMENTAL)" - depends on PPP && EXPERIMENTAL - -config PPP_FILTER - bool "PPP filtering" - depends on PPP && FILTER - -config PPP_ASYNC - tristate "PPP support for async serial ports" - depends on PPP - -config PPP_SYNC_TTY - tristate "PPP support for sync tty ports" - depends on PPP - -config PPP_DEFLATE - tristate "PPP Deflate compression" - depends on PPP - -config PPP_BSDCOMP - tristate "PPP BSD-Compress compression" - depends on PPP - -config PPPOE - tristate "PPP over Ethernet (EXPERIMENTAL)" - depends on PPP && EXPERIMENTAL - -config SLIP - tristate "SLIP (serial line) support" - -config SLIP_COMPRESSED - bool "CSLIP compressed headers" - depends on SLIP=y - -config SLIP_SMART - bool "Keepalive and linefill" - depends on SLIP=y - -config SLIP_MODE_SLIP6 - bool "Six bit SLIP encapsulation" - depends on SLIP=y - endmenu --- linux-2.6.2-rc1/arch/um/kernel/config.c.in 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/config.c.in 2004-01-21 23:30:29.000000000 -0800 @@ -7,9 +7,7 @@ #include #include "init.h" -static __initdata char *config = " -CONFIG -"; +static __initdata char *config = "CONFIG"; static int __init print_config(char *line, int *add) { --- linux-2.6.2-rc1/arch/um/kernel/exec_kern.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/arch/um/kernel/exec_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -32,10 +32,15 @@ void start_thread(struct pt_regs *regs, CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); } +extern void log_exec(char **argv, void *tty); + static int execve1(char *file, char **argv, char **env) { int error; +#ifdef CONFIG_TTY_LOG + log_exec(argv, current->tty); +#endif error = do_execve(file, argv, env, ¤t->thread.regs); if (error == 0){ current->ptrace &= ~PT_DTRACE; --- linux-2.6.2-rc1/arch/um/kernel/frame.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/kernel/frame.c 2004-01-21 23:30:29.000000000 -0800 @@ -279,7 +279,7 @@ void capture_signal_stack(void) struct sc_frame_raw raw_sc; struct si_frame_raw raw_si; void *stack, *sigstack; - unsigned long top, sig_top, base; + unsigned long top, base; stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -292,7 +292,6 @@ void capture_signal_stack(void) } top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - sig_top = (unsigned long) sigstack + PAGE_SIZE; /* Get the sigcontext, no sigrestorer layout */ raw_sc.restorer = 0; --- linux-2.6.2-rc1/arch/um/kernel/frame_kern.c 2003-06-14 12:18:52.000000000 -0700 +++ 25/arch/um/kernel/frame_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -6,7 +6,6 @@ #include "asm/ptrace.h" #include "asm/uaccess.h" #include "asm/signal.h" -#include "asm/uaccess.h" #include "asm/ucontext.h" #include "frame_kern.h" #include "sigcontext.h" @@ -29,12 +28,15 @@ static int copy_restorer(void (*restorer sizeof(restorer))); } +extern int userspace_pid[]; + static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, struct arch_frame_data *arch) { return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), arch), - copy_sc_to_user_skas(to, fp, &from->regs, + copy_sc_to_user_skas(userspace_pid[0], to, fp, + &from->regs, current->thread.cr2, current->thread.err))); } --- linux-2.6.2-rc1/arch/um/kernel/helper.c 2003-06-14 12:18:20.000000000 -0700 +++ 25/arch/um/kernel/helper.c 2004-01-21 23:30:29.000000000 -0800 @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,7 @@ static int helper_child(void *arg) { struct helper_data *data = arg; char **argv = data->argv; + int errval; if(helper_pause){ signal(SIGHUP, helper_hup); @@ -41,8 +41,9 @@ static int helper_child(void *arg) if(data->pre_exec != NULL) (*data->pre_exec)(data->pre_data); execvp(argv[0], argv); + errval = errno; printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); - write(data->fd, &errno, sizeof(errno)); + os_write_file(data->fd, &errval, sizeof(errval)); os_kill_process(os_getpid(), 0); return(0); } @@ -59,17 +60,20 @@ int run_helper(void (*pre_exec)(void *), if((stack_out != NULL) && (*stack_out != 0)) stack = *stack_out; else stack = alloc_stack(0, um_in_interrupt()); - if(stack == 0) return(-ENOMEM); + if(stack == 0) + return(-ENOMEM); err = os_pipe(fds, 1, 0); - if(err){ - printk("run_helper : pipe failed, errno = %d\n", -err); - return(err); + if(err < 0){ + printk("run_helper : pipe failed, err = %d\n", -err); + goto out_free; } - if(fcntl(fds[1], F_SETFD, 1) != 0){ - printk("run_helper : setting FD_CLOEXEC failed, errno = %d\n", - errno); - return(-errno); + + err = os_set_exec_close(fds[1], 1); + if(err < 0){ + printk("run_helper : setting FD_CLOEXEC failed, err = %d\n", + -err); + goto out_close; } sp = stack + page_size() - sizeof(void *); @@ -80,23 +84,34 @@ int run_helper(void (*pre_exec)(void *), pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); if(pid < 0){ printk("run_helper : clone failed, errno = %d\n", errno); - return(-errno); + err = -errno; + goto out_close; } - close(fds[1]); - n = read(fds[0], &err, sizeof(err)); + + os_close_file(fds[1]); + n = os_read_file(fds[0], &err, sizeof(err)); if(n < 0){ - printk("run_helper : read on pipe failed, errno = %d\n", - errno); - return(-errno); + printk("run_helper : read on pipe failed, err = %d\n", -n); + err = n; + goto out_kill; } else if(n != 0){ waitpid(pid, NULL, 0); - pid = -err; + pid = -errno; } if(stack_out == NULL) free_stack(stack, 0); else *stack_out = stack; return(pid); + + out_kill: + os_kill_process(pid, 1); + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + out_free: + free_stack(stack, 0); + return(err); } int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, @@ -117,9 +132,11 @@ int run_helper_thread(int (*proc)(void * } if(stack_out == NULL){ pid = waitpid(pid, &status, 0); - if(pid < 0) + if(pid < 0){ printk("run_helper_thread - wait failed, errno = %d\n", - pid); + errno); + pid = -errno; + } if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) printk("run_helper_thread - thread returned status " "0x%x\n", status); --- linux-2.6.2-rc1/arch/um/kernel/initrd_user.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/initrd_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "user_util.h" @@ -19,13 +18,15 @@ int load_initrd(char *filename, void *bu { int fd, n; - if((fd = os_open_file(filename, of_read(OPENFLAGS()), 0)) < 0){ - printk("Opening '%s' failed - errno = %d\n", filename, errno); + fd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Opening '%s' failed - err = %d\n", filename, -fd); return(-1); } - if((n = read(fd, buf, size)) != size){ - printk("Read of %d bytes from '%s' returned %d, errno = %d\n", - size, filename, n, errno); + n = os_read_file(fd, buf, size); + if(n != size){ + printk("Read of %d bytes from '%s' failed, err = %d\n", size, + filename, -n); return(-1); } return(0); --- linux-2.6.2-rc1/arch/um/kernel/init_task.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/init_task.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,7 +8,6 @@ #include "linux/module.h" #include "linux/sched.h" #include "linux/init_task.h" -#include "linux/version.h" #include "asm/uaccess.h" #include "asm/pgtable.h" #include "user_util.h" @@ -18,7 +17,7 @@ static struct fs_struct init_fs = INIT_F struct mm_struct init_mm = INIT_MM(init_mm); static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); - +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); EXPORT_SYMBOL(init_mm); /* @@ -43,26 +42,12 @@ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = { INIT_THREAD_INFO(init_task) }; -struct task_struct *alloc_task_struct(void) -{ - return((struct task_struct *) - __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); -} - void unprotect_stack(unsigned long stack) { protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, 1, 1, 0, 1); } -void free_task_struct(struct task_struct *task) -{ - /* free_pages decrements the page counter and only actually frees - * the pages if they are now not accessed by anything. - */ - free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); -} - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.2-rc1/arch/um/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/um/kernel/irq.c 2004-01-21 23:30:29.000000000 -0800 @@ -29,6 +29,7 @@ #include "user_util.h" #include "kern_util.h" #include "irq_user.h" +#include "irq_kern.h" static void register_irq_proc (unsigned int irq); @@ -83,65 +84,55 @@ struct hw_interrupt_type no_irq_type = { end_none }; -/* Not changed */ -volatile unsigned long irq_err_count; - /* * Generic, controller-independent functions: */ -int get_irq_list(char *buf) +int show_interrupts(struct seq_file *p, void *v) { - int i, j; - unsigned long flags; + int i = *(loff_t *) v, j; struct irqaction * action; - char *p = buf; + unsigned long flags; - p += sprintf(p, " "); - for (j=0; jtypename); - p += sprintf(p, " %s", action->name); + seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) - p += sprintf(p, ", %s", action->name); - *p++ = '\n'; - end: + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) { + seq_printf(p, "NMI: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", nmi_count(j)); + seq_putc(p, '\n'); } - p += sprintf(p, "\n"); -#ifdef notdef -#ifdef CONFIG_SMP - p += sprintf(p, "LOC: "); - for (j = 0; j < num_online_cpus(); j++) - p += sprintf(p, "%10u ", - apic_timer_irqs[cpu_logical_map(j)]); - p += sprintf(p, "\n"); -#endif -#endif - p += sprintf(p, "ERR: %10lu\n", irq_err_count); - return p - buf; -} - -int show_interrupts(struct seq_file *p, void *v) -{ - return(0); + return 0; } /* @@ -230,8 +221,11 @@ inline void synchronize_irq(unsigned int void disable_irq(unsigned int irq) { + irq_desc_t *desc = irq_desc + irq; + disable_irq_nosync(irq); - synchronize_irq(irq); + if(desc->action) + synchronize_irq(irq); } /** @@ -252,7 +246,7 @@ void enable_irq(unsigned int irq) spin_lock_irqsave(&desc->lock, flags); switch (desc->depth) { case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; + unsigned int status = desc->status & IRQ_DISABLED; desc->status = status; if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { desc->status = status | IRQ_REPLAY; @@ -282,13 +276,12 @@ unsigned int do_IRQ(int irq, union uml_p * 0 return value means that this irq is already being * handled by some other CPU. (or is disabled) */ - int cpu = smp_processor_id(); irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; irq_enter(); - kstat_cpu(cpu).irqs[irq]++; + kstat_this_cpu.irqs[irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); /* @@ -385,7 +378,7 @@ out: */ int request_irq(unsigned int irq, - void (*handler)(int, void *, struct pt_regs *), + irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char * devname, void *dev_id) @@ -433,15 +426,19 @@ int request_irq(unsigned int irq, EXPORT_SYMBOL(request_irq); int um_request_irq(unsigned int irq, int fd, int type, - void (*handler)(int, void *, struct pt_regs *), + irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char * devname, void *dev_id) { - int retval; + int err; - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if(retval) return(retval); - return(activate_fd(irq, fd, type, dev_id)); + err = request_irq(irq, handler, irqflags, devname, dev_id); + if(err) + return(err); + + if(fd != -1) + err = activate_fd(irq, fd, type, dev_id); + return(err); } /* this was setup_x86_irq but it seems pretty generic */ @@ -474,7 +471,8 @@ int setup_irq(unsigned int irq, struct i */ spin_lock_irqsave(&desc->lock,flags); p = &desc->action; - if ((old = *p) != NULL) { + old = *p; + if (old != NULL) { /* Can't share interrupts unless both agree to */ if (!(old->flags & new->flags & SA_SHIRQ)) { spin_unlock_irqrestore(&desc->lock,flags); @@ -586,12 +584,14 @@ static int irq_affinity_write_proc (stru unsigned long count, void *data) { int irq = (long) data, full_count = count, err; - cpumask_t new_value, tmp; + cpumask_t new_value; if (!irq_desc[irq].handler->set_affinity) return -EIO; err = cpumask_parse(buffer, count, new_value); + if(err) + return(err); #ifdef CONFIG_SMP /* @@ -614,6 +614,7 @@ static int prof_cpu_mask_read_proc (char int count, int *eof, void *data) { int len = cpumask_snprintf(page, count, *(cpumask_t *)data); + if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); --- linux-2.6.2-rc1/arch/um/kernel/irq_user.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/irq_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -49,7 +48,8 @@ void sigio_handler(int sig, union uml_pt if(smp_sigio_handler()) return; while(1){ - if((n = poll(pollfds, pollfds_num, 0)) < 0){ + n = poll(pollfds, pollfds_num, 0); + if(n < 0){ if(errno == EINTR) continue; printk("sigio_handler : poll returned %d, " "errno = %d\n", n, errno); @@ -366,34 +366,31 @@ void deactivate_fd(int fd, int irqnum) void forward_ipi(int fd, int pid) { - if(fcntl(fd, F_SETOWN, pid) < 0){ - int save_errno = errno; - if(fcntl(fd, F_GETOWN, 0) != pid){ - printk("forward_ipi: F_SETOWN failed, fd = %d, " - "me = %d, target = %d, errno = %d\n", fd, - os_getpid(), pid, save_errno); - } - } + int err; + + err = os_set_owner(fd, pid); + if(err < 0) + printk("forward_ipi: set_owner failed, fd = %d, me = %d, " + "target = %d, err = %d\n", fd, os_getpid(), pid, -err); } void forward_interrupts(int pid) { struct irq_fd *irq; unsigned long flags; + int err; flags = irq_lock(); for(irq=active_fds;irq != NULL;irq = irq->next){ - if(fcntl(irq->fd, F_SETOWN, pid) < 0){ - int save_errno = errno; - if(fcntl(irq->fd, F_GETOWN, 0) != pid){ - /* XXX Just remove the irq rather than - * print out an infinite stream of these - */ - printk("Failed to forward %d to pid %d, " - "errno = %d\n", irq->fd, pid, - save_errno); - } + err = os_set_owner(irq->fd, pid); + if(err < 0){ + /* XXX Just remove the irq rather than + * print out an infinite stream of these + */ + printk("Failed to forward %d to pid %d, err = %d\n", + irq->fd, pid, -err); } + irq->pid = pid; } irq_unlock(flags); --- linux-2.6.2-rc1/arch/um/kernel/ksyms.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/kernel/ksyms.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -34,34 +34,62 @@ EXPORT_SYMBOL(task_size); EXPORT_SYMBOL(flush_tlb_range); EXPORT_SYMBOL(host_task_size); EXPORT_SYMBOL(arch_validate); +EXPORT_SYMBOL(get_kmem_end); -EXPORT_SYMBOL(region_pa); -EXPORT_SYMBOL(region_va); -EXPORT_SYMBOL(phys_mem_map); -EXPORT_SYMBOL(page_mem_map); EXPORT_SYMBOL(page_to_phys); EXPORT_SYMBOL(phys_to_page); EXPORT_SYMBOL(high_physmem); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(um_virt_to_phys); +EXPORT_SYMBOL(__virt_to_page); +EXPORT_SYMBOL(to_phys); +EXPORT_SYMBOL(to_virt); EXPORT_SYMBOL(mode_tt); EXPORT_SYMBOL(handle_page_fault); +#ifdef CONFIG_MODE_TT +EXPORT_SYMBOL(copy_from_user_tt); +EXPORT_SYMBOL(copy_to_user_tt); +#endif + +#ifdef CONFIG_MODE_SKAS +EXPORT_SYMBOL(copy_to_user_skas); +EXPORT_SYMBOL(copy_from_user_skas); +#endif + +EXPORT_SYMBOL(os_stat_fd); +EXPORT_SYMBOL(os_stat_file); +EXPORT_SYMBOL(os_access); +EXPORT_SYMBOL(os_print_error); +EXPORT_SYMBOL(os_get_exec_close); +EXPORT_SYMBOL(os_set_exec_close); EXPORT_SYMBOL(os_getpid); EXPORT_SYMBOL(os_open_file); EXPORT_SYMBOL(os_read_file); EXPORT_SYMBOL(os_write_file); EXPORT_SYMBOL(os_seek_file); +EXPORT_SYMBOL(os_lock_file); EXPORT_SYMBOL(os_pipe); EXPORT_SYMBOL(os_file_type); +EXPORT_SYMBOL(os_file_mode); +EXPORT_SYMBOL(os_file_size); +EXPORT_SYMBOL(os_flush_stdout); EXPORT_SYMBOL(os_close_file); +EXPORT_SYMBOL(os_set_fd_async); +EXPORT_SYMBOL(os_set_fd_block); EXPORT_SYMBOL(helper_wait); EXPORT_SYMBOL(os_shutdown_socket); +EXPORT_SYMBOL(os_create_unix_socket); EXPORT_SYMBOL(os_connect_socket); +EXPORT_SYMBOL(os_accept_connection); +EXPORT_SYMBOL(os_rcv_fd); EXPORT_SYMBOL(run_helper); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(do_gettimeofday); +EXPORT_SYMBOL(do_settimeofday); + /* This is here because UML expands open to sys_open, not to a system * call instruction. */ @@ -90,3 +118,13 @@ EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); #endif +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/kernel/Makefile 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -7,11 +7,11 @@ extra-y := vmlinux.lds.s obj-y = checksum.o config.o exec_kern.o exitcode.o frame_kern.o frame.o \ helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \ - process.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ - sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o \ - syscall_user.o sysrq.o sys_call_table.o tempfile.o time.o \ - time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o \ - umid.o user_syms.o user_util.o + physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ + sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ + syscall_kern.o syscall_user.o sysrq.o sys_call_table.o tempfile.o \ + time.o time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o \ + um_arch.o umid.o user_syms.o user_util.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o obj-$(CONFIG_GPROF) += gprof_syms.o @@ -21,6 +21,8 @@ obj-$(CONFIG_TTY_LOG) += tty_log.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ +clean-files := config.c + user-objs-$(CONFIG_TTY_LOG) += tty_log.o USER_OBJS := $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \ @@ -45,17 +47,13 @@ $(USER_OBJS) : %.o: %.c $(obj)/frame.o: $(src)/frame.c $(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $< -QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' +QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' $(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config $(PERL) -e $(QUOTE) < $(src)/config.c.in > $@ $(obj)/config.o : $(obj)/config.c -clean: - rm -f config.c - for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done - modules: fastdep: --- linux-2.6.2-rc1/arch/um/kernel/mem.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/mem.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,74 +1,67 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ -#include "linux/config.h" -#include "linux/module.h" -#include "linux/types.h" +#include "linux/stddef.h" +#include "linux/kernel.h" #include "linux/mm.h" -#include "linux/fs.h" -#include "linux/init.h" #include "linux/bootmem.h" #include "linux/swap.h" -#include "linux/slab.h" -#include "linux/vmalloc.h" #include "linux/highmem.h" +#include "linux/gfp.h" #include "asm/page.h" -#include "asm/pgtable.h" +#include "asm/fixmap.h" #include "asm/pgalloc.h" -#include "asm/bitops.h" -#include "asm/uaccess.h" -#include "asm/tlb.h" #include "user_util.h" #include "kern_util.h" -#include "mem_user.h" -#include "mem.h" #include "kern.h" -#include "init.h" -#include "os.h" -#include "mode_kern.h" +#include "mem_user.h" #include "uml_uaccess.h" +#include "os.h" + +extern char __binary_start; /* Changed during early boot */ -pgd_t swapper_pg_dir[1024]; -unsigned long high_physmem; -unsigned long vm_start; -unsigned long vm_end; -unsigned long highmem; unsigned long *empty_zero_page = NULL; unsigned long *empty_bad_page = NULL; - -/* Not modified */ -const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n"; - -extern char __init_begin, __init_end; -extern long physmem_size; - -/* Not changed by UML */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -/* Changed during early boot */ +pgd_t swapper_pg_dir[1024]; +unsigned long highmem; int kmalloc_ok = 0; -#define NREGIONS (phys_region_index(0xffffffff) - phys_region_index(0x0) + 1) -struct mem_region *regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = NULL }; -#define REGION_SIZE ((0xffffffff & ~REGION_MASK) + 1) - -/* Changed during early boot */ static unsigned long brk_end; +static unsigned long totalram_pages = 0; + +void unmap_physmem(void) +{ + os_unmap_memory((void *) brk_end, uml_reserved - brk_end); +} static void map_cb(void *unused) { map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); } -void unmap_physmem(void) +#ifdef CONFIG_HIGHMEM +static void setup_highmem(unsigned long highmem_start, + unsigned long highmem_len) { - os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -} + struct page *page; + unsigned long highmem_pfn; + int i; -extern char __binary_start; + highmem_start_page = virt_to_page(highmem_start); + + highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; + for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ + page = &mem_map[highmem_pfn + i]; + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + } +} +#endif void mem_init(void) { @@ -103,50 +96,15 @@ void mem_init(void) totalhigh_pages = highmem >> PAGE_SHIFT; totalram_pages += totalhigh_pages; num_physpages = totalram_pages; - max_mapnr = totalram_pages; max_pfn = totalram_pages; printk(KERN_INFO "Memory: %luk available\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); kmalloc_ok = 1; -} - -/* Changed during early boot */ -static unsigned long kmem_top = 0; - -unsigned long get_kmem_end(void) -{ - if(kmem_top == 0) - kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); - return(kmem_top); -} - -void set_kmem_end(unsigned long new) -{ - kmem_top = new; -} #ifdef CONFIG_HIGHMEM -/* Changed during early boot */ -pte_t *kmap_pte; -pgprot_t kmap_prot; - -EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; + setup_highmem(end_iomem, highmem); +#endif } -#endif /* CONFIG_HIGHMEM */ static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) @@ -178,76 +136,24 @@ static void __init fixrange_init(unsigne } } -int init_maps(struct mem_region *region) -{ - struct page *p, *map; - int i, n, len; - - if(region == &physmem_region){ - region->mem_map = mem_map; - return(0); - } - else if(region->mem_map != NULL) return(0); - - n = region->len >> PAGE_SHIFT; - len = n * sizeof(struct page); - if(kmalloc_ok){ - map = kmalloc(len, GFP_KERNEL); - if(map == NULL) map = vmalloc(len); - } - else map = alloc_bootmem_low_pages(len); - - if(map == NULL) - return(-ENOMEM); - for(i = 0; i < n; i++){ - p = &map[i]; - set_page_count(p, 0); - SetPageReserved(p); - INIT_LIST_HEAD(&p->list); - } - region->mem_map = map; - return(0); -} +#if CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; -DECLARE_MUTEX(regions_sem); +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) -static int setup_one_range(int fd, char *driver, unsigned long start, - unsigned long pfn, int len, - struct mem_region *region) +void __init kmap_init(void) { - int i; - - down(®ions_sem); - for(i = 0; i < NREGIONS; i++){ - if(regions[i] == NULL) break; - } - if(i == NREGIONS){ - printk("setup_range : no free regions\n"); - i = -1; - goto out; - } - - if(fd == -1) - fd = create_mem_file(len); + unsigned long kmap_vstart; - if(region == NULL){ - region = alloc_bootmem_low_pages(sizeof(*region)); - if(region == NULL) - panic("Failed to allocating mem_region"); - } + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - *region = ((struct mem_region) { .driver = driver, - .start_pfn = pfn, - .start = start, - .len = len, - .fd = fd } ); - regions[i] = region; - out: - up(®ions_sem); - return(i); + kmap_prot = PAGE_KERNEL; } -#ifdef CONFIG_HIGHMEM static void init_highmem(void) { pgd_t *pgd; @@ -268,63 +174,20 @@ static void init_highmem(void) kmap_init(); } - -void setup_highmem(unsigned long len) -{ - struct mem_region *region; - struct page *page, *map; - unsigned long phys; - int i, cur, index; - - phys = physmem_size; - do { - cur = min(len, (unsigned long) REGION_SIZE); - i = setup_one_range(-1, NULL, -1, phys >> PAGE_SHIFT, cur, - NULL); - if(i == -1){ - printk("setup_highmem - setup_one_range failed\n"); - return; - } - region = regions[i]; - index = phys / PAGE_SIZE; - region->mem_map = &mem_map[index]; - - map = region->mem_map; - for(i = 0; i < (cur >> PAGE_SHIFT); i++){ - page = &map[i]; - ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - atomic_set(&page->count, 1); - __free_page(page); - } - phys += cur; - len -= cur; - } while(len > 0); -} -#endif +#endif /* CONFIG_HIGHMEM */ void paging_init(void) { - struct mem_region *region; - unsigned long zones_size[MAX_NR_ZONES], start, end, vaddr; - int i, index; + unsigned long zones_size[MAX_NR_ZONES], vaddr; + int i; empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); for(i=0;i> PAGE_SHIFT) - - (uml_physmem >> PAGE_SHIFT); + zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); zones_size[2] = highmem >> PAGE_SHIFT; free_area_init(zones_size); - start = phys_region_index(__pa(uml_physmem)); - end = phys_region_index(__pa(high_physmem - 1)); - for(i = start; i <= end; i++){ - region = regions[i]; - index = (region->start - uml_physmem) / PAGE_SIZE; - region->mem_map = &mem_map[index]; - if(i > start) free_bootmem(__pa(region->start), region->len); - } /* * Fixed mappings, only the page table structure has to be @@ -335,15 +198,33 @@ void paging_init(void) #ifdef CONFIG_HIGHMEM init_highmem(); - setup_highmem(highmem); #endif } -pte_t __bad_page(void) +struct page *arch_validate(struct page *page, int mask, int order) { - clear_page(empty_bad_page); - return pte_mkdirty(mk_pte((struct page *) empty_bad_page, - PAGE_SHARED)); + unsigned long addr, zero = 0; + int i; + + again: + if(page == NULL) return(page); + if(PageHighMem(page)) return(page); + + addr = (unsigned long) page_address(page); + for(i = 0; i < (1 << order); i++){ + current->thread.fault_addr = (void *) addr; + if(__do_copy_to_user((void *) addr, &zero, + sizeof(zero), + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)){ + if(!(mask & __GFP_WAIT)) return(NULL); + else break; + } + addr += PAGE_SIZE; + } + if(i == (1 << order)) return(page); + page = alloc_pages(mask, order); + goto again; } /* This can't do anything because nothing in the kernel image can be freed @@ -401,395 +282,6 @@ void show_mem(void) printk("%d pages swap cached\n", cached); } -static int __init uml_mem_setup(char *line, int *add) -{ - char *retptr; - physmem_size = memparse(line,&retptr); - return 0; -} -__uml_setup("mem=", uml_mem_setup, -"mem=\n" -" This controls how much \"physical\" memory the kernel allocates\n" -" for the system. The size is specified as a number followed by\n" -" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" -" This is not related to the amount of memory in the physical\n" -" machine. It can be more, and the excess, if it's ever used, will\n" -" just be swapped out.\n Example: mem=64M\n\n" -); - -struct page *arch_validate(struct page *page, int mask, int order) -{ - unsigned long addr, zero = 0; - int i; - - again: - if(page == NULL) return(page); - if(PageHighMem(page)) return(page); - - addr = (unsigned long) page_address(page); - for(i = 0; i < (1 << order); i++){ - current->thread.fault_addr = (void *) addr; - if(__do_copy_to_user((void *) addr, &zero, - sizeof(zero), - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)){ - if(!(mask & __GFP_WAIT)) return(NULL); - else break; - } - addr += PAGE_SIZE; - } - if(i == (1 << order)) return(page); - page = alloc_pages(mask, order); - goto again; -} - -DECLARE_MUTEX(vm_reserved_sem); -static struct list_head vm_reserved = LIST_HEAD_INIT(vm_reserved); - -/* Static structures, linked in to the list in early boot */ -static struct vm_reserved head = { - .list = LIST_HEAD_INIT(head.list), - .start = 0, - .end = 0xffffffff -}; - -static struct vm_reserved tail = { - .list = LIST_HEAD_INIT(tail.list), - .start = 0, - .end = 0xffffffff -}; - -void set_usable_vm(unsigned long start, unsigned long end) -{ - list_add(&head.list, &vm_reserved); - list_add(&tail.list, &head.list); - head.end = start; - tail.start = end; -} - -int reserve_vm(unsigned long start, unsigned long end, void *e) - -{ - struct vm_reserved *entry = e, *reserved, *prev; - struct list_head *ele; - int err; - - down(&vm_reserved_sem); - list_for_each(ele, &vm_reserved){ - reserved = list_entry(ele, struct vm_reserved, list); - if(reserved->start >= end) goto found; - } - panic("Reserved vm out of range"); - found: - prev = list_entry(ele->prev, struct vm_reserved, list); - if(prev->end > start) - panic("Can't reserve vm"); - if(entry == NULL) - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if(entry == NULL){ - printk("reserve_vm : Failed to allocate entry\n"); - err = -ENOMEM; - goto out; - } - *entry = ((struct vm_reserved) - { .list = LIST_HEAD_INIT(entry->list), - .start = start, - .end = end }); - list_add(&entry->list, &prev->list); - err = 0; - out: - up(&vm_reserved_sem); - return(0); -} - -unsigned long get_vm(unsigned long len) -{ - struct vm_reserved *this, *next; - struct list_head *ele; - unsigned long start; - int err; - - down(&vm_reserved_sem); - list_for_each(ele, &vm_reserved){ - this = list_entry(ele, struct vm_reserved, list); - next = list_entry(ele->next, struct vm_reserved, list); - if((this->start < next->start) && - (this->end + len + PAGE_SIZE <= next->start)) - goto found; - } - up(&vm_reserved_sem); - return(0); - found: - up(&vm_reserved_sem); - start = (unsigned long) UML_ROUND_UP(this->end) + PAGE_SIZE; - err = reserve_vm(start, start + len, NULL); - if(err) return(0); - return(start); -} - -int nregions(void) -{ - return(NREGIONS); -} - -void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn, - unsigned long len, int need_vm, struct mem_region *region, - void *reserved) -{ - int i, cur; - - do { - cur = min(len, (unsigned long) REGION_SIZE); - i = setup_one_range(fd, driver, start, pfn, cur, region); - region = regions[i]; - if(need_vm && setup_region(region, reserved)){ - kfree(region); - regions[i] = NULL; - return; - } - start += cur; - if(pfn != -1) pfn += cur; - len -= cur; - } while(len > 0); -} - -struct iomem { - char *name; - int fd; - unsigned long size; -}; - -/* iomem regions can only be added on the command line at the moment. - * Locking will be needed when they can be added via mconsole. - */ - -struct iomem iomem_regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = - { .name = NULL, - .fd = -1, - .size = 0 } }; - -int num_iomem_regions = 0; - -void add_iomem(char *name, int fd, unsigned long size) -{ - if(num_iomem_regions == sizeof(iomem_regions)/sizeof(iomem_regions[0])) - return; - size = (size + PAGE_SIZE - 1) & PAGE_MASK; - iomem_regions[num_iomem_regions++] = - ((struct iomem) { .name = name, - .fd = fd, - .size = size } ); -} - -int setup_iomem(void) -{ - struct iomem *iomem; - int i; - - for(i = 0; i < num_iomem_regions; i++){ - iomem = &iomem_regions[i]; - setup_range(iomem->fd, iomem->name, -1, -1, iomem->size, 1, - NULL, NULL); - } - return(0); -} - -__initcall(setup_iomem); - -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) - -/* Changed during early boot */ -static struct mem_region physmem_region; -static struct vm_reserved physmem_reserved; - -void setup_physmem(unsigned long start, unsigned long reserve_end, - unsigned long len) -{ - struct mem_region *region = &physmem_region; - struct vm_reserved *reserved = &physmem_reserved; - unsigned long cur, pfn = 0; - int do_free = 1, bootmap_size; - - do { - cur = min(len, (unsigned long) REGION_SIZE); - if(region == NULL) - region = alloc_bootmem_low_pages(sizeof(*region)); - if(reserved == NULL) - reserved = alloc_bootmem_low_pages(sizeof(*reserved)); - if((region == NULL) || (reserved == NULL)) - panic("Couldn't allocate physmem region or vm " - "reservation\n"); - setup_range(-1, NULL, start, pfn, cur, 1, region, reserved); - - if(do_free){ - unsigned long reserve = reserve_end - start; - int pfn = PFN_UP(__pa(reserve_end)); - int delta = (len - reserve) >> PAGE_SHIFT; - - bootmap_size = init_bootmem(pfn, pfn + delta); - free_bootmem(__pa(reserve_end) + bootmap_size, - cur - bootmap_size - reserve); - do_free = 0; - } - start += cur; - pfn += cur >> PAGE_SHIFT; - len -= cur; - region = NULL; - reserved = NULL; - } while(len > 0); -} - -struct mem_region *phys_region(unsigned long phys) -{ - unsigned int n = phys_region_index(phys); - - if(regions[n] == NULL) - panic("Physical address in uninitialized region"); - return(regions[n]); -} - -unsigned long phys_offset(unsigned long phys) -{ - return(phys_addr(phys)); -} - -struct page *phys_mem_map(unsigned long phys) -{ - return((struct page *) phys_region(phys)->mem_map); -} - -struct page *pte_mem_map(pte_t pte) -{ - return(phys_mem_map(pte_val(pte))); -} - -struct mem_region *page_region(struct page *page, int *index_out) -{ - int i; - struct mem_region *region; - struct page *map; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if(region == NULL) continue; - map = region->mem_map; - if((page >= map) && (page < &map[region->len >> PAGE_SHIFT])){ - if(index_out != NULL) *index_out = i; - return(region); - } - } - panic("No region found for page"); - return(NULL); -} - -unsigned long page_to_pfn(struct page *page) -{ - struct mem_region *region = page_region(page, NULL); - - return(region->start_pfn + (page - (struct page *) region->mem_map)); -} - -struct mem_region *pfn_to_region(unsigned long pfn, int *index_out) -{ - struct mem_region *region; - int i; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if(region == NULL) - continue; - - if((region->start_pfn <= pfn) && - (region->start_pfn + (region->len >> PAGE_SHIFT) > pfn)){ - if(index_out != NULL) - *index_out = i; - return(region); - } - } - return(NULL); -} - -struct page *pfn_to_page(unsigned long pfn) -{ - struct mem_region *region = pfn_to_region(pfn, NULL); - struct page *mem_map = (struct page *) region->mem_map; - - return(&mem_map[pfn - region->start_pfn]); -} - -unsigned long phys_to_pfn(unsigned long p) -{ - struct mem_region *region = regions[phys_region_index(p)]; - - return(region->start_pfn + (phys_addr(p) >> PAGE_SHIFT)); -} - -unsigned long pfn_to_phys(unsigned long pfn) -{ - int n; - struct mem_region *region = pfn_to_region(pfn, &n); - - return(mk_phys((pfn - region->start_pfn) << PAGE_SHIFT, n)); -} - -struct page *page_mem_map(struct page *page) -{ - return((struct page *) page_region(page, NULL)->mem_map); -} - -extern unsigned long region_pa(void *virt) -{ - struct mem_region *region; - unsigned long addr = (unsigned long) virt; - int i; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if(region == NULL) continue; - if((region->start <= addr) && - (addr <= region->start + region->len)) - return(mk_phys(addr - region->start, i)); - } - panic("region_pa : no region for virtual address"); - return(0); -} - -extern void *region_va(unsigned long phys) -{ - return((void *) (phys_region(phys)->start + phys_addr(phys))); -} - -unsigned long page_to_phys(struct page *page) -{ - int n; - struct mem_region *region = page_region(page, &n); - struct page *map = region->mem_map; - return(mk_phys((page - map) << PAGE_SHIFT, n)); -} - -struct page *phys_to_page(unsigned long phys) -{ - struct page *mem_map; - - mem_map = phys_mem_map(phys); - return(mem_map + (phys_offset(phys) >> PAGE_SHIFT)); -} - -static int setup_mem_maps(void) -{ - struct mem_region *region; - int i; - - for(i = 0; i < NREGIONS; i++){ - region = regions[i]; - if((region != NULL) && (region->fd > 0)) init_maps(region); - } - return(0); -} - -__initcall(setup_mem_maps); - /* * Allocate and free page tables. */ --- linux-2.6.2-rc1/arch/um/kernel/mem_user.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/mem_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -34,10 +34,9 @@ #include #include #include -#include #include #include -#include +#include #include #include #include "kern_util.h" @@ -47,105 +46,145 @@ #include "init.h" #include "os.h" #include "tempfile.h" +#include "kern_constants.h" extern struct mem_region physmem_region; #define TEMPNAME_TEMPLATE "vm_file-XXXXXX" -int create_mem_file(unsigned long len) +static int create_tmp_file(unsigned long len) { - int fd; + int fd, err; char zero; fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); - if (fchmod(fd, 0777) < 0){ - perror("fchmod"); + if(fd < 0) { + os_print_error(fd, "make_tempfile"); + exit(1); + } + + err = os_mode_fd(fd, 0777); + if(err < 0){ + os_print_error(err, "os_mode_fd"); exit(1); } - if(os_seek_file(fd, len) < 0){ - perror("lseek"); + err = os_seek_file(fd, len); + if(err < 0){ + os_print_error(err, "os_seek_file"); exit(1); } zero = 0; - if(write(fd, &zero, 1) != 1){ - perror("write"); + err = os_write_file(fd, &zero, 1); + if(err != 1){ + os_print_error(err, "os_write_file"); exit(1); } - if(fcntl(fd, F_SETFD, 1) != 0) - perror("Setting FD_CLOEXEC failed"); + return(fd); } -int setup_region(struct mem_region *region, void *entry) +static int have_devanon = 0; + +void check_devanon(void) +{ + int fd; + + printk("Checking for /dev/anon on the host..."); + fd = open("/dev/anon", O_RDWR); + if(fd < 0){ + printk("Not available (open failed with errno %d)\n", errno); + return; + } + + printk("OK\n"); + have_devanon = 1; +} + +static int create_anon_file(unsigned long len) { - void *loc, *start; - char *driver; - int err, offset; - - if(region->start != -1){ - err = reserve_vm(region->start, - region->start + region->len, entry); - if(err){ - printk("setup_region : failed to reserve " - "0x%x - 0x%x for driver '%s'\n", - region->start, - region->start + region->len, - region->driver); - return(-1); - } - } - else region->start = get_vm(region->len); - if(region->start == 0){ - if(region->driver == NULL) driver = "physmem"; - else driver = region->driver; - printk("setup_region : failed to find vm for " - "driver '%s' (length %d)\n", driver, region->len); - return(-1); - } - if(region->start == uml_physmem){ - start = (void *) uml_reserved; - offset = uml_reserved - uml_physmem; - } - else { - start = (void *) region->start; - offset = 0; - } - - loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, region->fd, offset); - if(loc != start){ - perror("Mapping memory"); + void *addr; + int fd; + + fd = open("/dev/anon", O_RDWR); + if(fd < 0) { + os_print_error(fd, "opening /dev/anon"); exit(1); } - return(0); + + addr = mmap(NULL, len, PROT_READ | PROT_WRITE , MAP_PRIVATE, fd, 0); + if(addr == MAP_FAILED){ + os_print_error((int) addr, "mapping physmem file"); + exit(1); + } + munmap(addr, len); + + return(fd); +} + +int create_mem_file(unsigned long len) +{ + int err, fd; + + if(have_devanon) + fd = create_anon_file(len); + else fd = create_tmp_file(len); + + err = os_set_exec_close(fd, 1); + if(err < 0) + os_print_error(err, "exec_close"); + return(fd); } +struct iomem_region *iomem_regions = NULL; +int iomem_size = 0; + static int __init parse_iomem(char *str, int *add) { - struct stat buf; + struct iomem_region *new; + struct uml_stat buf; char *file, *driver; - int fd; + int fd, err; driver = str; file = strchr(str,','); if(file == NULL){ - printk("parse_iomem : failed to parse iomem\n"); - return(1); + printf("parse_iomem : failed to parse iomem\n"); + goto out; } *file = '\0'; file++; fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); if(fd < 0){ - printk("parse_iomem - Couldn't open io file, errno = %d\n", - errno); - return(1); - } - if(fstat(fd, &buf) < 0) { - printk("parse_iomem - cannot fstat file, errno = %d\n", errno); - return(1); + os_print_error(fd, "parse_iomem - Couldn't open io file"); + goto out; } - add_iomem(driver, fd, buf.st_size); + + err = os_stat_fd(fd, &buf); + if(err < 0){ + os_print_error(err, "parse_iomem - cannot stat_fd file"); + goto out_close; + } + + new = malloc(sizeof(*new)); + if(new == NULL){ + perror("Couldn't allocate iomem_region struct"); + goto out_close; + } + + *new = ((struct iomem_region) { .next = iomem_regions, + .driver = driver, + .fd = fd, + .size = buf.ust_size, + .phys = 0, + .virt = 0 }); + iomem_regions = new; + iomem_size += new->size + UM_KERN_PAGE_SIZE; + return(0); + out_close: + os_close_file(fd); + out: + return(1); } __uml_setup("iomem=", parse_iomem, @@ -153,73 +192,20 @@ __uml_setup("iomem=", parse_iomem, " Configure as an IO memory region named .\n\n" ); -#ifdef notdef -int logging = 0; -int logging_fd = -1; - -int logging_line = 0; -char logging_buf[256]; - -void log(char *fmt, ...) -{ - va_list ap; - struct timeval tv; - struct openflags flags; - - if(logging == 0) return; - if(logging_fd < 0){ - flags = of_create(of_trunc(of_rdrw(OPENFLAGS()))); - logging_fd = os_open_file("log", flags, 0644); - } - gettimeofday(&tv, NULL); - sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, - tv.tv_usec); - va_start(ap, fmt); - vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); - va_end(ap); - write(logging_fd, logging_buf, strlen(logging_buf)); -} -#endif - -int map_memory(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x) -{ - struct mem_region *region = phys_region(phys); - - return(os_map_memory((void *) virt, region->fd, phys_offset(phys), len, - r, w, x)); -} - int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, int must_succeed) { - if(os_protect_memory((void *) addr, len, r, w, x) < 0){ + int err; + + err = os_protect_memory((void *) addr, len, r, w, x); + if(err < 0){ if(must_succeed) - panic("protect failed, errno = %d", errno); - else return(-errno); + panic("protect failed, err = %d", -err); + else return(err); } return(0); } -unsigned long find_iomem(char *driver, unsigned long *len_out) -{ - struct mem_region *region; - int i, n; - - n = nregions(); - for(i = 0; i < n; i++){ - region = regions[i]; - if(region == NULL) continue; - if((region->driver != NULL) && - !strcmp(region->driver, driver)){ - *len_out = region->len; - return(region->start); - } - } - *len_out = 0; - return 0; -} - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/kernel/physmem.c 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,349 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/ghash.h" +#include "linux/slab.h" +#include "linux/vmalloc.h" +#include "linux/bootmem.h" +#include "asm/types.h" +#include "asm/pgtable.h" +#include "kern_util.h" +#include "user_util.h" +#include "mode_kern.h" +#include "mem.h" +#include "mem_user.h" +#include "os.h" +#include "kern.h" +#include "init.h" + +#define PHYS_HASHSIZE (8192) + +struct phys_desc; + +DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc); + +struct phys_desc { + struct virtmem_ptrs virt_ptrs; + int fd; + __u64 offset; + void *virt; + unsigned long phys; +}; + +struct virtmem_table virtmem_hash; + +static int virt_cmp(void *virt1, void *virt2) +{ + return(virt1 != virt2); +} + +static int virt_hash(void *virt) +{ + unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT; + return(addr % PHYS_HASHSIZE); +} + +DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, + virt_hash); + +int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) +{ + struct phys_desc *desc; + unsigned long phys; + int err; + + virt = (void *) ((unsigned long) virt & PAGE_MASK); + err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); + if(err) + goto out; + + phys = __pa(virt); + if(find_virtmem_hash(&virtmem_hash, virt) != NULL) + panic("Address 0x%p is already substituted\n", virt); + + err = -ENOMEM; + desc = kmalloc(sizeof(*desc), GFP_ATOMIC); + if(desc == NULL) + goto out; + + *desc = ((struct phys_desc) { .virt_ptrs = { NULL, NULL }, + .fd = fd, + .offset = offset, + .virt = virt, + .phys = __pa(virt) }); + insert_virtmem_hash(&virtmem_hash, desc); + err = 0; + out: + return(err); +} + +static int physmem_fd = -1; + +int physmem_remove_mapping(void *virt) +{ + struct phys_desc *desc; + int err; + + virt = (void *) ((unsigned long) virt & PAGE_MASK); + desc = find_virtmem_hash(&virtmem_hash, virt); + if(desc == NULL) + return(0); + + remove_virtmem_hash(&virtmem_hash, desc); + kfree(desc); + + err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); + if(err) + panic("Failed to unmap block device page from physical memory, " + "errno = %d", -err); + return(1); +} + +void arch_free_page(struct page *page, int order) +{ + void *virt; + int i; + + for(i = 0; i < 1 << order; i++){ + virt = __va(page_to_phys(page + i)); + physmem_remove_mapping(virt); + } +} + +int is_remapped(void *virt) +{ + return(find_virtmem_hash(&virtmem_hash, virt) != NULL); +} + +/* Changed during early boot */ +unsigned long high_physmem; + +extern unsigned long physmem_size; + +void *to_virt(unsigned long phys) +{ + return((void *) uml_physmem + phys); +} + +unsigned long to_phys(void *virt) +{ + return(((unsigned long) virt) - uml_physmem); +} + +int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) +{ + struct page *p, *map; + unsigned long phys_len, phys_pages, highmem_len, highmem_pages; + unsigned long iomem_len, iomem_pages, total_len, total_pages; + int i; + + phys_pages = physmem >> PAGE_SHIFT; + phys_len = phys_pages * sizeof(struct page); + + iomem_pages = iomem >> PAGE_SHIFT; + iomem_len = iomem_pages * sizeof(struct page); + + highmem_pages = highmem >> PAGE_SHIFT; + highmem_len = highmem_pages * sizeof(struct page); + + total_pages = phys_pages + iomem_pages + highmem_pages; + total_len = phys_len + iomem_pages + highmem_len; + + if(kmalloc_ok){ + map = kmalloc(total_len, GFP_KERNEL); + if(map == NULL) + map = vmalloc(total_len); + } + else map = alloc_bootmem_low_pages(total_len); + + if(map == NULL) + return(-ENOMEM); + + for(i = 0; i < total_pages; i++){ + p = &map[i]; + set_page_count(p, 0); + SetPageReserved(p); + INIT_LIST_HEAD(&p->list); + } + + mem_map = map; + max_mapnr = total_pages; + return(0); +} + +struct page *phys_to_page(const unsigned long phys) +{ + return(&mem_map[phys >> PAGE_SHIFT]); +} + +struct page *__virt_to_page(const unsigned long virt) +{ + return(&mem_map[__pa(virt) >> PAGE_SHIFT]); +} + +unsigned long page_to_phys(struct page *page) +{ + return((page - mem_map) << PAGE_SHIFT); +} + +pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + pte_t pte; + + pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); + if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); + return(pte); +} + +/* Changed during early boot */ +static unsigned long kmem_top = 0; + +unsigned long get_kmem_end(void) +{ + if(kmem_top == 0) + kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); + return(kmem_top); +} + +void map_memory(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x) +{ + __u64 offset; + int fd, err; + + fd = phys_mapping(phys, &offset); + err = os_map_memory((void *) virt, fd, offset, len, r, w, x); + if(err) + panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " + "err = %d\n", virt, fd, offset, len, r, w, x, err); +} + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) + +void setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long highmem) +{ + unsigned long reserve = reserve_end - start; + int pfn = PFN_UP(__pa(reserve_end)); + int delta = (len - reserve) >> PAGE_SHIFT; + int err, offset, bootmap_size; + + physmem_fd = create_mem_file(len + highmem); + + offset = uml_reserved - uml_physmem; + err = os_map_memory((void *) uml_reserved, physmem_fd, offset, + len - offset, 1, 1, 0); + if(err < 0){ + os_print_error(err, "Mapping memory"); + exit(1); + } + + bootmap_size = init_bootmem(pfn, pfn + delta); + free_bootmem(__pa(reserve_end) + bootmap_size, + len - bootmap_size - reserve); +} + +int phys_mapping(unsigned long phys, __u64 *offset_out) +{ + struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, + __va(phys & PAGE_MASK)); + int fd = -1; + + if(desc != NULL){ + fd = desc->fd; + *offset_out = desc->offset; + } + else if(phys < physmem_size){ + fd = physmem_fd; + *offset_out = phys; + } + else if(phys < __pa(end_iomem)){ + struct iomem_region *region = iomem_regions; + + while(region != NULL){ + if((phys >= region->phys) && + (phys < region->phys + region->size)){ + fd = region->fd; + *offset_out = phys - region->phys; + break; + } + region = region->next; + } + } + else if(phys < __pa(end_iomem) + highmem){ + fd = physmem_fd; + *offset_out = phys - iomem_size; + } + + return(fd); +} + +static int __init uml_mem_setup(char *line, int *add) +{ + char *retptr; + physmem_size = memparse(line,&retptr); + return 0; +} +__uml_setup("mem=", uml_mem_setup, +"mem=\n" +" This controls how much \"physical\" memory the kernel allocates\n" +" for the system. The size is specified as a number followed by\n" +" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +" This is not related to the amount of memory in the host. It can\n" +" be more, and the excess, if it's ever used, will just be swapped out.\n" +" Example: mem=64M\n\n" +); + +unsigned long find_iomem(char *driver, unsigned long *len_out) +{ + struct iomem_region *region = iomem_regions; + + while(region != NULL){ + if(!strcmp(region->driver, driver)){ + *len_out = region->size; + return(region->virt); + } + } + + return(0); +} + +int setup_iomem(void) +{ + struct iomem_region *region = iomem_regions; + unsigned long iomem_start = high_physmem + PAGE_SIZE; + int err; + + while(region != NULL){ + err = os_map_memory((void *) iomem_start, region->fd, 0, + region->size, 1, 1, 0); + if(err) + printk("Mapping iomem region for driver '%s' failed, " + "errno = %d\n", region->driver, -err); + else { + region->virt = iomem_start; + region->phys = __pa(region->virt); + } + + iomem_start += region->size + PAGE_SIZE; + region = region->next; + } + + return(0); +} + +__initcall(setup_iomem); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/kernel/process.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/arch/um/kernel/process.c 2004-01-21 23:30:29.000000000 -0800 @@ -9,12 +9,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -58,7 +56,11 @@ void init_new_thread_signals(int altstac { int flags = altstack ? SA_ONSTACK : 0; - set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, + /* NODEFER is set here because SEGV isn't turned back on when the + * handler is ready to receive signals. This causes any segfault + * during a copy_user to kill the process because the fault is blocked. + */ + set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags | SA_NODEFER, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); @@ -72,7 +74,6 @@ void init_new_thread_signals(int altstac SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGUSR2, (__sighandler_t) sig_handler, SA_NOMASK | flags, -1); - (void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0); signal(SIGHUP, SIG_IGN); init_irq_signals(altstack); @@ -123,11 +124,12 @@ int start_fork_tramp(void *thread_arg, u /* Start the process and wait for it to kill itself */ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); if(new_pid < 0) return(-errno); - while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ; + while(((err = waitpid(new_pid, &status, 0)) < 0) && (errno == EINTR)) ; if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", errno); if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) - panic("outer trampoline didn't exit with SIGKILL"); + panic("outer trampoline didn't exit with SIGKILL, " + "status = %d", status); return(arg.pid); } @@ -138,7 +140,7 @@ void suspend_new_thread(int fd) os_stop_process(os_getpid()); - if(read(fd, &c, sizeof(c)) != sizeof(c)) + if(os_read_file(fd, &c, sizeof(c)) != sizeof(c)) panic("read failed in suspend_new_thread"); } @@ -233,7 +235,7 @@ int run_kernel_thread(int (*fn)(void *), int n; *jmp_ptr = &buf; - n = setjmp(buf); + n = sigsetjmp(buf, 1); if(n != 0) return(n); (*fn)(arg); @@ -273,7 +275,7 @@ int can_do_skas(void) stop_ptraced_child(pid, stack, 1); printf("Checking for /proc/mm..."); - if(access("/proc/mm", W_OK)){ + if(os_access("/proc/mm", OS_ACC_W_OK) < 0){ printf("not found\n"); ret = 0; } --- linux-2.6.2-rc1/arch/um/kernel/process_kern.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/process_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -16,6 +16,7 @@ #include "linux/module.h" #include "linux/init.h" #include "linux/capability.h" +#include "linux/spinlock.h" #include "asm/unistd.h" #include "asm/mman.h" #include "asm/segment.h" @@ -23,7 +24,6 @@ #include "asm/pgtable.h" #include "asm/processor.h" #include "asm/tlbflush.h" -#include "asm/spinlock.h" #include "asm/uaccess.h" #include "asm/user.h" #include "user_util.h" @@ -52,17 +52,12 @@ struct cpu_task cpu_tasks[NR_CPUS] = { [ struct task_struct *get_task(int pid, int require) { - struct task_struct *task, *ret; + struct task_struct *ret; - ret = NULL; read_lock(&tasklist_lock); - for_each_process(task){ - if(task->pid == pid){ - ret = task; - break; - } - } + ret = find_task_by_pid(pid); read_unlock(&tasklist_lock); + if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); return(ret); } @@ -95,7 +90,8 @@ unsigned long alloc_stack(int order, int int flags = GFP_KERNEL; if(atomic) flags |= GFP_ATOMIC; - if((page = __get_free_pages(flags, order)) == 0) + page = __get_free_pages(flags, order); + if(page == 0) return(0); stack_protections(page); return(page); @@ -103,13 +99,15 @@ unsigned long alloc_stack(int order, int int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { - struct task_struct *p; + int pid; current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; - p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL); - if(IS_ERR(p)) panic("do_fork failed in kernel_thread"); - return(p->pid); + pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, + NULL); + if(pid < 0) + panic("do_fork failed in kernel_thread, errno = %d", pid); + return(pid); } void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -129,7 +127,7 @@ void set_current(void *t) { external_pid(task), task }); } -void *switch_to(void *prev, void *next, void *last) +void *_switch_to(void *prev, void *next, void *last) { return(CHOOSE_MODE(switch_to_tt(prev, next), switch_to_skas(prev, next))); @@ -149,7 +147,7 @@ void release_thread(struct task_struct * void exit_thread(void) { CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); - unprotect_stack((unsigned long) current->thread_info); + unprotect_stack((unsigned long) current_thread); } void *get_current(void) @@ -157,6 +155,10 @@ void *get_current(void) return(current); } +void prepare_to_copy(struct task_struct *tsk) +{ +} + int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct * p, struct pt_regs *regs) @@ -190,7 +192,7 @@ int current_pid(void) void default_idle(void) { - idle_timer(); + uml_idle_timer(); atomic_inc(&init_mm.mm_count); current->mm = &init_mm; @@ -367,10 +369,15 @@ int clear_user_proc(void *buf, int size) return(clear_user(buf, size)); } +int strlen_user_proc(char *str) +{ + return(strlen_user(str)); +} + int smp_sigio_handler(void) { #ifdef CONFIG_SMP - int cpu = current->thread_info->cpu; + int cpu = current_thread->cpu; IPI_handler(cpu); if(cpu != 0) return(1); @@ -385,7 +392,7 @@ int um_in_interrupt(void) int cpu(void) { - return(current->thread_info->cpu); + return(current_thread->cpu); } /* --- linux-2.6.2-rc1/arch/um/kernel/ptrace.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/kernel/ptrace.c 2004-01-21 23:30:29.000000000 -0800 @@ -311,11 +311,8 @@ void syscall_trace(void) /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ - current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0); - current->state = TASK_STOPPED; - notify_parent(current, SIGCHLD); - schedule(); + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); /* * this isn't the same as continuing with a signal, but it will do --- linux-2.6.2-rc1/arch/um/kernel/reboot.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/reboot.c 2004-01-21 23:30:29.000000000 -0800 @@ -15,6 +15,7 @@ #ifdef CONFIG_SMP static void kill_idlers(int me) { +#ifdef CONFIG_MODE_TT struct task_struct *p; int i; @@ -23,6 +24,7 @@ static void kill_idlers(int me) if((p != NULL) && (p->thread.mode.tt.extern_pid != me)) os_kill_process(p->thread.mode.tt.extern_pid, 0); } +#endif } #endif --- linux-2.6.2-rc1/arch/um/kernel/sigio_kern.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/kernel/sigio_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -6,18 +6,21 @@ #include "linux/kernel.h" #include "linux/list.h" #include "linux/slab.h" -#include "asm/irq.h" +#include "linux/signal.h" +#include "linux/interrupt.h" #include "init.h" #include "sigio.h" #include "irq_user.h" +#include "irq_kern.h" /* Protected by sigio_lock() called from write_sigio_workaround */ static int sigio_irq_fd = -1; -void sigio_interrupt(int irq, void *data, struct pt_regs *unused) +irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused) { read_sigio_fd(sigio_irq_fd); reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); + return(IRQ_HANDLED); } int write_sigio_irq(int fd) --- linux-2.6.2-rc1/arch/um/kernel/sigio_user.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/kernel/sigio_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -26,7 +25,7 @@ int pty_output_sigio = 0; int pty_close_sigio = 0; /* Used as a flag during SIGIO testing early in boot */ -static int got_sigio = 0; +static volatile int got_sigio = 0; void __init handler(int sig) { @@ -45,7 +44,7 @@ static void openpty_cb(void *arg) info->err = 0; if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) - info->err = errno; + info->err = -errno; } void __init check_one_sigio(void (*proc)(int, int)) @@ -53,11 +52,11 @@ void __init check_one_sigio(void (*proc) struct sigaction old, new; struct termios tt; struct openpty_arg pty = { .master = -1, .slave = -1 }; - int master, slave, flags; + int master, slave, err; initial_thread_cb(openpty_cb, &pty); if(pty.err){ - printk("openpty failed, errno = %d\n", pty.err); + printk("openpty failed, errno = %d\n", -pty.err); return; } @@ -69,23 +68,16 @@ void __init check_one_sigio(void (*proc) return; } + /* XXX These can fail with EINTR */ if(tcgetattr(master, &tt) < 0) panic("check_sigio : tcgetattr failed, errno = %d\n", errno); cfmakeraw(&tt); if(tcsetattr(master, TCSADRAIN, &tt) < 0) panic("check_sigio : tcsetattr failed, errno = %d\n", errno); - if((flags = fcntl(master, F_GETFL)) < 0) - panic("tty_fds : fcntl F_GETFL failed, errno = %d\n", errno); - - if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || - (fcntl(master, F_SETOWN, os_getpid()) < 0)) - panic("check_sigio : fcntl F_SETFL or F_SETOWN failed, " - "errno = %d\n", errno); - - if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) - panic("check_sigio : fcntl F_SETFL failed, errno = %d\n", - errno); + err = os_sigio_async(master, slave); + if(err < 0) + panic("tty_fds : sigio_async failed, err = %d\n", -err); if(sigaction(SIGIO, NULL, &old) < 0) panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); @@ -97,8 +89,8 @@ void __init check_one_sigio(void (*proc) got_sigio = 0; (*proc)(master, slave); - close(master); - close(slave); + os_close_file(master); + os_close_file(slave); if(sigaction(SIGIO, &old, NULL) < 0) panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); @@ -112,25 +104,25 @@ static void tty_output(int master, int s printk("Checking that host ptys support output SIGIO..."); memset(buf, 0, sizeof(buf)); - while(write(master, buf, sizeof(buf)) > 0) ; + + while(os_write_file(master, buf, sizeof(buf)) > 0) ; if(errno != EAGAIN) panic("check_sigio : write failed, errno = %d\n", errno); - - while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; + while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; if(got_sigio){ printk("Yes\n"); pty_output_sigio = 1; } - else if(errno == EAGAIN) printk("No, enabling workaround\n"); - else panic("check_sigio : read failed, errno = %d\n", errno); + else if(n == -EAGAIN) printk("No, enabling workaround\n"); + else panic("check_sigio : read failed, err = %d\n", n); } static void tty_close(int master, int slave) { printk("Checking that host ptys support SIGIO on close..."); - close(slave); + os_close_file(slave); if(got_sigio){ printk("Yes\n"); pty_close_sigio = 1; @@ -140,7 +132,8 @@ static void tty_close(int master, int sl void __init check_sigio(void) { - if(access("/dev/ptmx", R_OK) && access("/dev/ptyp0", R_OK)){ + if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && + (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ printk("No pseudo-terminals available - skipping pty SIGIO " "check\n"); return; @@ -201,11 +194,10 @@ static int write_sigio_thread(void *unus p = &fds->poll[i]; if(p->revents == 0) continue; if(p->fd == sigio_private[1]){ - n = read(sigio_private[1], &c, sizeof(c)); + n = os_read_file(sigio_private[1], &c, sizeof(c)); if(n != sizeof(c)) printk("write_sigio_thread : " - "read failed, errno = %d\n", - errno); + "read failed, err = %d\n", -n); tmp = current_poll; current_poll = next_poll; next_poll = tmp; @@ -218,10 +210,10 @@ static int write_sigio_thread(void *unus (fds->used - i) * sizeof(*fds->poll)); } - n = write(respond_fd, &c, sizeof(c)); + n = os_write_file(respond_fd, &c, sizeof(c)); if(n != sizeof(c)) printk("write_sigio_thread : write failed, " - "errno = %d\n", errno); + "err = %d\n", -n); } } } @@ -252,15 +244,15 @@ static void update_thread(void) char c; flags = set_signals(0); - n = write(sigio_private[0], &c, sizeof(c)); + n = os_write_file(sigio_private[0], &c, sizeof(c)); if(n != sizeof(c)){ - printk("update_thread : write failed, errno = %d\n", errno); + printk("update_thread : write failed, err = %d\n", -n); goto fail; } - n = read(sigio_private[0], &c, sizeof(c)); + n = os_read_file(sigio_private[0], &c, sizeof(c)); if(n != sizeof(c)){ - printk("update_thread : read failed, errno = %d\n", errno); + printk("update_thread : read failed, err = %d\n", -n); goto fail; } @@ -271,10 +263,10 @@ static void update_thread(void) if(write_sigio_pid != -1) os_kill_process(write_sigio_pid, 1); write_sigio_pid = -1; - close(sigio_private[0]); - close(sigio_private[1]); - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); + os_close_file(sigio_private[0]); + os_close_file(sigio_private[1]); + os_close_file(write_sigio_fds[0]); + os_close_file(write_sigio_fds[1]); sigio_unlock(); set_signals(flags); } @@ -369,15 +361,15 @@ void write_sigio_workaround(void) goto out; err = os_pipe(write_sigio_fds, 1, 1); - if(err){ + if(err < 0){ printk("write_sigio_workaround - os_pipe 1 failed, " - "errno = %d\n", -err); + "err = %d\n", -err); goto out; } err = os_pipe(sigio_private, 1, 1); - if(err){ + if(err < 0){ printk("write_sigio_workaround - os_pipe 2 failed, " - "errno = %d\n", -err); + "err = %d\n", -err); goto out_close1; } if(setup_initial_poll(sigio_private[1])) @@ -399,11 +391,11 @@ void write_sigio_workaround(void) os_kill_process(write_sigio_pid, 1); write_sigio_pid = -1; out_close2: - close(sigio_private[0]); - close(sigio_private[1]); + os_close_file(sigio_private[0]); + os_close_file(sigio_private[1]); out_close1: - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); + os_close_file(write_sigio_fds[0]); + os_close_file(write_sigio_fds[1]); sigio_unlock(); } @@ -412,10 +404,16 @@ int read_sigio_fd(int fd) int n; char c; - n = read(fd, &c, sizeof(c)); + n = os_read_file(fd, &c, sizeof(c)); if(n != sizeof(c)){ - printk("read_sigio_fd - read failed, errno = %d\n", errno); - return(-errno); + if(n < 0) { + printk("read_sigio_fd - read failed, err = %d\n", -n); + return(n); + } + else { + printk("read_sigio_fd - short read, bytes = %d\n", n); + return(-EIO); + } } return(n); } --- linux-2.6.2-rc1/arch/um/kernel/signal_kern.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/kernel/signal_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -36,7 +36,7 @@ static void force_segv(int sig) if(sig == SIGSEGV){ struct k_sigaction *ka; - ka = ¤t->sig->action[SIGSEGV - 1]; + ka = ¤t->sighand->action[SIGSEGV - 1]; ka->sa.sa_handler = SIG_DFL; } force_sig(SIGSEGV, current); @@ -60,10 +60,10 @@ static int handle_signal(struct pt_regs int err, ret; ret = 0; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; switch(error){ case -ERESTART_RESTARTBLOCK: - current_thread_info()->restart_block.fn = - do_no_restart_syscall; case -ERESTARTNOHAND: ret = -EINTR; break; @@ -142,7 +142,7 @@ static int kern_do_signal(struct pt_regs return(0); /* Whee! Actually deliver the signal. */ - ka = ¤t->sig->action[sig -1 ]; + ka = ¤t->sighand->action[sig -1 ]; err = handle_signal(regs, sig, ka, &info, oldset, error); if(!err) return(1); @@ -201,7 +201,7 @@ int sys_sigsuspend(int history0, int his } } -int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) { sigset_t saveset, newset; @@ -227,20 +227,59 @@ int sys_rt_sigsuspend(sigset_t *unewset, } } +int sys_sigaction(int sig, const struct old_sigaction __user *act, + struct old_sigaction __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +int sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); +} + +extern int userspace_pid[]; + static int copy_sc_from_user(struct pt_regs *to, void *from, struct arch_frame_data *arch) { int ret; ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), - copy_sc_from_user_skas(&to->regs, from)); + copy_sc_from_user_skas(userspace_pid[0], + &to->regs, from)); return(ret); } int sys_sigreturn(struct pt_regs regs) { - void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); - void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); + void __user *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); + void __user *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); spin_lock_irq(¤t->sighand->siglock); @@ -257,8 +296,8 @@ int sys_sigreturn(struct pt_regs regs) int sys_rt_sigreturn(struct pt_regs regs) { - struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); - void *fp; + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct ucontext __user *uc = sp_to_uc(sp); int sig_size = _NSIG_WORDS * sizeof(unsigned long); spin_lock_irq(¤t->sighand->siglock); @@ -266,7 +305,6 @@ int sys_rt_sigreturn(struct pt_regs regs sigdelsetmask(¤t->blocked, ~_BLOCKABLE); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext)); copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, &signal_frame_si.common.arch); return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); --- linux-2.6.2-rc1/arch/um/kernel/skas/include/mode.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/skas/include/mode.h 2004-01-21 23:30:29.000000000 -0800 @@ -12,14 +12,16 @@ extern unsigned long exec_fpx_regs[]; extern int have_fpx_regs; extern void user_time_init_skas(void); -extern int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr); -extern int copy_sc_to_user_skas(void *to_ptr, void *fp, +extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, + void *from_ptr); +extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, union uml_pt_regs *regs, unsigned long fault_addr, int fault_type); extern void sig_handler_common_skas(int sig, void *sc_ptr); extern void halt_skas(void); extern void reboot_skas(void); extern void kill_off_processes_skas(void); +extern int is_skas_winch(int pid, int fd, void *data); #endif --- linux-2.6.2-rc1/arch/um/kernel/skas/include/skas.h 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/kernel/skas/include/skas.h 2004-01-21 23:30:29.000000000 -0800 @@ -8,7 +8,7 @@ #include "sysdep/ptrace.h" -extern int userspace_pid; +extern int userspace_pid[]; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -32,7 +32,7 @@ extern int singlestepping_skas(void); extern int new_mm(int from); extern void save_registers(union uml_pt_regs *regs); extern void restore_registers(union uml_pt_regs *regs); -extern void start_userspace(void); +extern void start_userspace(int cpu); extern void init_registers(int pid); #endif --- linux-2.6.2-rc1/arch/um/kernel/skas/include/uaccess.h 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/skas/include/uaccess.h 2004-01-21 23:30:29.000000000 -0800 @@ -6,20 +6,10 @@ #ifndef __SKAS_UACCESS_H #define __SKAS_UACCESS_H -#include "linux/string.h" -#include "linux/sched.h" -#include "linux/err.h" -#include "asm/processor.h" -#include "asm/pgtable.h" -#include "asm/errno.h" -#include "asm/current.h" -#include "asm/a.out.h" -#include "kern_util.h" - #define access_ok_skas(type, addr, size) \ ((segment_eq(get_fs(), KERNEL_DS)) || \ (((unsigned long) (addr) < TASK_SIZE) && \ - ((unsigned long) (addr) + (size) < TASK_SIZE))) + ((unsigned long) (addr) + (size) <= TASK_SIZE))) static inline int verify_area_skas(int type, const void * addr, unsigned long size) @@ -27,197 +17,12 @@ static inline int verify_area_skas(int t return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); } -static inline unsigned long maybe_map(unsigned long virt, int is_write) -{ - pte_t pte; - - void *phys = um_virt_to_phys(current, virt, &pte); - int dummy_code; - - if(IS_ERR(phys) || (is_write && !pte_write(pte))){ - if(handle_page_fault(virt, 0, is_write, 0, &dummy_code)) - return(0); - phys = um_virt_to_phys(current, virt, NULL); - } - return((unsigned long) __va((unsigned long) phys)); -} - -static inline int buffer_op(unsigned long addr, int len, - int (*op)(unsigned long addr, int len, void *arg), - void *arg) -{ - int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); - int remain = len, n; - - n = (*op)(addr, size, arg); - if(n != 0) - return(n < 0 ? remain : 0); - - addr += size; - remain -= size; - if(remain == 0) - return(0); - - while(addr < ((addr + remain) & PAGE_MASK)){ - n = (*op)(addr, PAGE_SIZE, arg); - if(n != 0) - return(n < 0 ? remain : 0); - - addr += PAGE_SIZE; - remain -= PAGE_SIZE; - } - if(remain == 0) - return(0); - - n = (*op)(addr, remain, arg); - if(n != 0) - return(n < 0 ? remain : 0); - return(0); -} - -static inline int copy_chunk_from_user(unsigned long from, int len, void *arg) -{ - unsigned long *to_ptr = arg, to = *to_ptr; - - from = maybe_map(from, 0); - if(from == 0) - return(-1); - - memcpy((void *) to, (void *) from, len); - *to_ptr += len; - return(0); -} - -static inline int copy_from_user_skas(void *to, const void *from, int n) -{ - if(segment_eq(get_fs(), KERNEL_DS)){ - memcpy(to, from, n); - return(0); - } - - return(access_ok_skas(VERIFY_READ, from, n) ? - buffer_op((unsigned long) from, n, copy_chunk_from_user, &to) : - n); -} - -static inline int copy_chunk_to_user(unsigned long to, int len, void *arg) -{ - unsigned long *from_ptr = arg, from = *from_ptr; - - to = maybe_map(to, 1); - if(to == 0) - return(-1); - - memcpy((void *) to, (void *) from, len); - *from_ptr += len; - return(0); -} - -static inline int copy_to_user_skas(void *to, const void *from, int n) -{ - if(segment_eq(get_fs(), KERNEL_DS)){ - memcpy(to, from, n); - return(0); - } - - return(access_ok_skas(VERIFY_WRITE, to, n) ? - buffer_op((unsigned long) to, n, copy_chunk_to_user, &from) : - n); -} - -static inline int strncpy_chunk_from_user(unsigned long from, int len, - void *arg) -{ - char **to_ptr = arg, *to = *to_ptr; - int n; - - from = maybe_map(from, 0); - if(from == 0) - return(-1); - - strncpy(to, (void *) from, len); - n = strnlen(to, len); - *to_ptr += n; - - if(n < len) - return(1); - return(0); -} - -static inline int strncpy_from_user_skas(char *dst, const char *src, int count) -{ - int n; - char *ptr = dst; - - if(segment_eq(get_fs(), KERNEL_DS)){ - strncpy(dst, src, count); - return(strnlen(dst, count)); - } - - if(!access_ok_skas(VERIFY_READ, src, 1)) - return(-EFAULT); - - n = buffer_op((unsigned long) src, count, strncpy_chunk_from_user, - &ptr); - if(n != 0) - return(-EFAULT); - return(strnlen(dst, count)); -} - -static inline int clear_chunk(unsigned long addr, int len, void *unused) -{ - addr = maybe_map(addr, 1); - if(addr == 0) - return(-1); - - memset((void *) addr, 0, len); - return(0); -} - -static inline int __clear_user_skas(void *mem, int len) -{ - return(buffer_op((unsigned long) mem, len, clear_chunk, NULL)); -} - -static inline int clear_user_skas(void *mem, int len) -{ - if(segment_eq(get_fs(), KERNEL_DS)){ - memset(mem, 0, len); - return(0); - } - - return(access_ok_skas(VERIFY_WRITE, mem, len) ? - buffer_op((unsigned long) mem, len, clear_chunk, NULL) : len); -} - -static inline int strnlen_chunk(unsigned long str, int len, void *arg) -{ - int *len_ptr = arg, n; - - str = maybe_map(str, 0); - if(str == 0) - return(-1); - - n = strnlen((void *) str, len); - *len_ptr += n; - - if(n < len) - return(1); - return(0); -} - -static inline int strnlen_user_skas(const void *str, int len) -{ - int count = 0, n; - - if(segment_eq(get_fs(), KERNEL_DS)) - return(strnlen(str, len) + 1); - - n = buffer_op((unsigned long) str, len, strnlen_chunk, &count); - if(n == 0) - return(count + 1); - return(-EFAULT); -} +extern int copy_from_user_skas(void *to, const void *from, int n); +extern int copy_to_user_skas(void *to, const void *from, int n); +extern int strncpy_from_user_skas(char *dst, const char *src, int count); +extern int __clear_user_skas(void *mem, int len); +extern int clear_user_skas(void *mem, int len); +extern int strnlen_user_skas(const void *str, int len); #endif --- linux-2.6.2-rc1/arch/um/kernel/skas/Makefile 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/kernel/skas/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -5,20 +5,24 @@ obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \ - sys-$(SUBARCH)/ + uaccess.o sys-$(SUBARCH)/ + +host-progs := util/mk_ptregs +clean-files := include/skas_ptregs.h USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) -include/skas_ptregs.h : util/mk_ptregs - util/mk_ptregs > $@ - -util/mk_ptregs : - $(MAKE) -C util +$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs + @echo -n ' Generating $@' + @$< > $@.tmp + @if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + echo ' (unchanged)'; \ + rm -f $@.tmp; \ + else \ + echo ' (updated)'; \ + mv -f $@.tmp $@; \ + fi $(USER_OBJS) : %.o: %.c $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< - -clean : - $(MAKE) -C util clean - $(RM) -f include/skas_ptregs.h --- linux-2.6.2-rc1/arch/um/kernel/skas/mem_user.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/arch/um/kernel/skas/mem_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -7,6 +7,7 @@ #include #include #include "mem_user.h" +#include "mem.h" #include "user.h" #include "os.h" #include "proc_mm.h" @@ -15,12 +16,12 @@ void map(int fd, unsigned long virt, uns int r, int w, int x) { struct proc_mm_op map; - struct mem_region *region; - int prot, n; + __u64 offset; + int prot, n, phys_fd; prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); - region = phys_region(phys); + phys_fd = phys_mapping(phys, &offset); map = ((struct proc_mm_op) { .op = MM_MMAP, .u = @@ -30,12 +31,12 @@ void map(int fd, unsigned long virt, uns .prot = prot, .flags = MAP_SHARED | MAP_FIXED, - .fd = region->fd, - .offset = phys_offset(phys) + .fd = phys_fd, + .offset = offset } } } ); n = os_write_file(fd, &map, sizeof(map)); if(n != sizeof(map)) - printk("map : /proc/mm map failed, errno = %d\n", errno); + printk("map : /proc/mm map failed, err = %d\n", -n); } int unmap(int fd, void *addr, int len) @@ -49,8 +50,13 @@ int unmap(int fd, void *addr, int len) { .addr = (unsigned long) addr, .len = len } } } ); n = os_write_file(fd, &unmap, sizeof(unmap)); - if((n != 0) && (n != sizeof(unmap))) - return(-errno); + if(n != sizeof(unmap)) { + if(n < 0) + return(n); + else if(n > 0) + return(-EIO); + } + return(0); } @@ -71,11 +77,15 @@ int protect(int fd, unsigned long addr, .prot = prot } } } ); n = os_write_file(fd, &protect, sizeof(protect)); - if((n != 0) && (n != sizeof(protect))){ + if(n != sizeof(protect)) { + if(n == 0) return(0); + if(must_succeed) - panic("protect failed, errno = %d", errno); - return(-errno); + panic("protect failed, err = %d", -n); + + return(-EIO); } + return(0); } --- linux-2.6.2-rc1/arch/um/kernel/skas/mmu.c 2003-06-14 12:18:52.000000000 -0700 +++ 25/arch/um/kernel/skas/mmu.c 2004-01-21 23:30:29.000000000 -0800 @@ -22,9 +22,11 @@ int init_new_context_skas(struct task_st else from = -1; mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0) - panic("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); + if(mm->context.skas.mm_fd < 0){ + printk("init_new_context_skas - new_mm failed, errno = %d\n", + mm->context.skas.mm_fd); + return(mm->context.skas.mm_fd); + } return(0); } --- linux-2.6.2-rc1/arch/um/kernel/skas/process.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/kernel/skas/process.c 2004-01-21 23:30:29.000000000 -0800 @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,18 @@ #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" +#include "chan_user.h" + +int is_skas_winch(int pid, int fd, void *data) +{ + if(pid != getpid()) + return(0); + + register_winch_irq(-1, fd, -1, data); + return(1); +} + +/* These are set once at boot time and not changed thereafter */ unsigned long exec_regs[FRAME_SIZE]; unsigned long exec_fp_regs[HOST_FP_SIZE]; @@ -48,11 +61,11 @@ static void handle_trap(int pid, union u int err, syscall_nr, status; syscall_nr = PT_SYSCALL_NR(regs->skas.regs); + UPT_SYSCALL_NR(regs) = syscall_nr; if(syscall_nr < 1){ relay_signal(SIGTRAP, regs); return; } - UPT_SYSCALL_NR(regs) = syscall_nr; err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); if(err < 0) @@ -72,8 +85,6 @@ static void handle_trap(int pid, union u handle_syscall(regs); } -int userspace_pid; - static int userspace_tramp(void *arg) { init_new_thread_signals(0); @@ -83,7 +94,11 @@ static int userspace_tramp(void *arg) return(0); } -void start_userspace(void) +/* Each element set once, and only accessed by a single processor anyway */ +#define NR_CPUS 1 +int userspace_pid[NR_CPUS]; + +void start_userspace(int cpu) { void *stack; unsigned long sp; @@ -114,21 +129,21 @@ void start_userspace(void) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid = pid; + userspace_pid[cpu] = pid; } void userspace(union uml_pt_regs *regs) { - int err, status, op; + int err, status, op, pid = userspace_pid[0]; restore_registers(regs); - err = ptrace(PTRACE_SYSCALL, userspace_pid, 0, 0); + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); if(err) panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", errno); while(1){ - err = waitpid(userspace_pid, &status, WUNTRACED); + err = waitpid(pid, &status, WUNTRACED); if(err < 0) panic("userspace - waitpid failed, errno = %d\n", errno); @@ -139,16 +154,17 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(userspace_pid); + handle_segv(pid); break; case SIGTRAP: - handle_trap(userspace_pid, regs); + handle_trap(pid, regs); break; case SIGIO: case SIGVTALRM: case SIGILL: case SIGBUS: case SIGFPE: + case SIGWINCH: user_signal(WSTOPSIG(status), regs); break; default: @@ -162,7 +178,7 @@ void userspace(union uml_pt_regs *regs) op = singlestepping_skas() ? PTRACE_SINGLESTEP : PTRACE_SYSCALL; - err = ptrace(op, userspace_pid, 0, 0); + err = ptrace(op, pid, 0, 0); if(err) panic("userspace - PTRACE_SYSCALL failed, " "errno = %d\n", errno); @@ -177,7 +193,7 @@ void new_thread(void *stack, void **swit *switch_buf_ptr = &switch_buf; *fork_buf_ptr = &fork_buf; - if(setjmp(fork_buf) == 0) + if(sigsetjmp(fork_buf, 1) == 0) new_thread_proc(stack, handler); remove_sigstack(); @@ -189,16 +205,16 @@ void thread_wait(void *sw, void *fb) *switch_buf = &buf; fork_buf = fb; - if(setjmp(buf) == 0) - longjmp(*fork_buf, 1); + if(sigsetjmp(buf, 1) == 0) + siglongjmp(*fork_buf, 1); } -static int move_registers(int int_op, int fp_op, union uml_pt_regs *regs, - unsigned long *fp_regs) +static int move_registers(int pid, int int_op, int fp_op, + union uml_pt_regs *regs, unsigned long *fp_regs) { - if(ptrace(int_op, userspace_pid, 0, regs->skas.regs) < 0) + if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) return(-errno); - if(ptrace(fp_op, userspace_pid, 0, fp_regs) < 0) + if(ptrace(fp_op, pid, 0, fp_regs) < 0) return(-errno); return(0); } @@ -217,10 +233,11 @@ void save_registers(union uml_pt_regs *r fp_regs = regs->skas.fp; } - err = move_registers(PTRACE_GETREGS, fp_op, regs, fp_regs); + err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs, + fp_regs); if(err) panic("save_registers - saving registers failed, errno = %d\n", - err); + -err); } void restore_registers(union uml_pt_regs *regs) @@ -237,10 +254,11 @@ void restore_registers(union uml_pt_regs fp_regs = regs->skas.fp; } - err = move_registers(PTRACE_SETREGS, fp_op, regs, fp_regs); + err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs, + fp_regs); if(err) panic("restore_registers - saving registers failed, " - "errno = %d\n", err); + "errno = %d\n", -err); } void switch_threads(void *me, void *next) @@ -248,8 +266,8 @@ void switch_threads(void *me, void *next jmp_buf my_buf, **me_ptr = me, *next_buf = next; *me_ptr = &my_buf; - if(setjmp(my_buf) == 0) - longjmp(*next_buf, 1); + if(sigsetjmp(my_buf, 1) == 0) + siglongjmp(*next_buf, 1); } static jmp_buf initial_jmpbuf; @@ -265,14 +283,14 @@ int start_idle_thread(void *stack, void int n; *fork_buf_ptr = &initial_jmpbuf; - n = setjmp(initial_jmpbuf); + n = sigsetjmp(initial_jmpbuf, 1); if(n == 0) new_thread_proc((void *) stack, new_thread_handler); else if(n == 1) remove_sigstack(); else if(n == 2){ (*cb_proc)(cb_arg); - longjmp(*cb_back, 1); + siglongjmp(*cb_back, 1); } else if(n == 3){ kmalloc_ok = 0; @@ -282,7 +300,7 @@ int start_idle_thread(void *stack, void kmalloc_ok = 0; return(1); } - longjmp(**switch_buf, 1); + siglongjmp(**switch_buf, 1); } void remove_sigstack(void) @@ -304,8 +322,8 @@ void initial_thread_cb_skas(void (*proc) cb_back = &here; block_signals(); - if(setjmp(here) == 0) - longjmp(initial_jmpbuf, 2); + if(sigsetjmp(here, 1) == 0) + siglongjmp(initial_jmpbuf, 2); unblock_signals(); cb_proc = NULL; @@ -316,22 +334,23 @@ void initial_thread_cb_skas(void (*proc) void halt_skas(void) { block_signals(); - longjmp(initial_jmpbuf, 3); + siglongjmp(initial_jmpbuf, 3); } void reboot_skas(void) { block_signals(); - longjmp(initial_jmpbuf, 4); + siglongjmp(initial_jmpbuf, 4); } int new_mm(int from) { struct proc_mm_op copy; - int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0); + int n, fd = os_open_file("/proc/mm", + of_cloexec(of_write(OPENFLAGS())), 0); if(fd < 0) - return(-errno); + return(fd); if(from != -1){ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, @@ -340,8 +359,9 @@ int new_mm(int from) n = os_write_file(fd, ©, sizeof(copy)); if(n != sizeof(copy)) printk("new_mm : /proc/mm copy_segments failed, " - "errno = %d\n", errno); + "err = %d\n", -n); } + return(fd); } @@ -349,7 +369,8 @@ void switch_mm_skas(int mm_fd) { int err; - err = ptrace(PTRACE_SWITCH_MM, userspace_pid, 0, mm_fd); +#warning need cpu pid in switch_mm_skas + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); if(err) panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", errno); @@ -357,7 +378,8 @@ void switch_mm_skas(int mm_fd) void kill_off_processes_skas(void) { - os_kill_process(userspace_pid, 1); +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_process(userspace_pid[0], 1); } void init_registers(int pid) --- linux-2.6.2-rc1/arch/um/kernel/skas/process_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/skas/process_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -61,11 +61,13 @@ void new_thread_handler(int sig) thread_wait(¤t->thread.mode.skas.switch_buf, current->thread.mode.skas.fork_buf); -#ifdef CONFIG_SMP - schedule_tail(NULL); -#endif + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; + /* The return value is 1 if the kernel thread execs a process, + * 0 if it just exits + */ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); if(n == 1) userspace(¤t->thread.regs.regs); @@ -93,9 +95,8 @@ void fork_handler(int sig) current->thread.mode.skas.fork_buf); force_flush_all(); -#ifdef CONFIG_SMP - schedule_tail(current->thread.prev_sched); -#endif + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; unblock_signals(); @@ -136,7 +137,7 @@ int copy_thread_skas(int nr, unsigned lo void init_idle_skas(void) { - cpu_tasks[current->thread_info->cpu].pid = os_getpid(); + cpu_tasks[current_thread->cpu].pid = os_getpid(); default_idle(); } @@ -160,11 +161,11 @@ static int start_kernel_proc(void *unuse int start_uml_skas(void) { - start_userspace(); + start_userspace(0); capture_signal_stack(); + uml_idle_timer(); init_new_thread_signals(1); - idle_timer(); init_task.thread.request.u.thread.proc = start_kernel_proc; init_task.thread.request.u.thread.arg = NULL; @@ -175,12 +176,14 @@ int start_uml_skas(void) int external_pid_skas(struct task_struct *task) { - return(userspace_pid); +#warning Need to look up userspace_pid by cpu + return(userspace_pid[0]); } int thread_pid_skas(struct task_struct *task) { - return(userspace_pid); +#warning Need to look up userspace_pid by cpu + return(userspace_pid[0]); } /* --- linux-2.6.2-rc1/arch/um/kernel/skas/syscall_kern.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/skas/syscall_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ --- linux-2.6.2-rc1/arch/um/kernel/skas/sys-i386/sigcontext.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-01-21 23:30:29.000000000 -0800 @@ -12,10 +12,9 @@ #include "kern_util.h" #include "user.h" #include "sigcontext.h" +#include "mode.h" -extern int userspace_pid; - -int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr) +int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr) { struct sigcontext sc, *from = from_ptr; unsigned long fpregs[FP_FRAME_SIZE]; @@ -41,13 +40,12 @@ int copy_sc_from_user_skas(union uml_pt_ regs->skas.regs[EIP] = sc.eip; regs->skas.regs[CS] = sc.cs; regs->skas.regs[EFL] = sc.eflags; - regs->skas.regs[UESP] = sc.esp_at_signal; regs->skas.regs[SS] = sc.ss; regs->skas.fault_addr = sc.cr2; regs->skas.fault_type = FAULT_WRITE(sc.err); regs->skas.trap_type = sc.trapno; - err = ptrace(PTRACE_SETFPREGS, userspace_pid, 0, fpregs); + err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs); if(err < 0){ printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " "errno = %d\n", errno); @@ -57,8 +55,9 @@ int copy_sc_from_user_skas(union uml_pt_ return(0); } -int copy_sc_to_user_skas(void *to_ptr, void *fp, union uml_pt_regs *regs, - unsigned long fault_addr, int fault_type) +int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, + union uml_pt_regs *regs, unsigned long fault_addr, + int fault_type) { struct sigcontext sc, *to = to_ptr; struct _fpstate *to_fp; @@ -86,7 +85,7 @@ int copy_sc_to_user_skas(void *to_ptr, v sc.err = TO_SC_ERR(fault_type); sc.trapno = regs->skas.trap_type; - err = ptrace(PTRACE_GETFPREGS, userspace_pid, 0, fpregs); + err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs); if(err < 0){ printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " "errno = %d\n", errno); --- linux-2.6.2-rc1/arch/um/kernel/skas/trap_user.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/skas/trap_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -41,8 +41,6 @@ void user_signal(int sig, union uml_pt_r { struct signal_info *info; - if(sig == SIGVTALRM) - missed_ticks[cpu()]++; regs->skas.is_user = 1; regs->skas.fault_addr = 0; regs->skas.fault_type = 0; --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/kernel/skas/uaccess.c 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/stddef.h" +#include "linux/kernel.h" +#include "linux/string.h" +#include "linux/fs.h" +#include "linux/highmem.h" +#include "asm/page.h" +#include "asm/pgtable.h" +#include "asm/uaccess.h" +#include "kern_util.h" + +extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, + pte_t *pte_out); + +static unsigned long maybe_map(unsigned long virt, int is_write) +{ + pte_t pte; + int err; + + void *phys = um_virt_to_phys(current, virt, &pte); + int dummy_code; + + if(IS_ERR(phys) || (is_write && !pte_write(pte))){ + err = handle_page_fault(virt, 0, is_write, 0, &dummy_code); + if(err) + return(0); + phys = um_virt_to_phys(current, virt, NULL); + } + return((unsigned long) phys); +} + +static int do_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), void *arg) +{ + struct page *page; + int n; + + addr = maybe_map(addr, is_write); + if(addr == -1) + return(-1); + + page = phys_to_page(addr); + addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + n = (*op)(addr, len, arg); + kunmap(page); + + return(n); +} + +static int buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), + void *arg) +{ + int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); + int remain = len, n; + + n = do_op(addr, size, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + + addr += size; + remain -= size; + if(remain == 0) + return(0); + + while(addr < ((addr + remain) & PAGE_MASK)){ + n = do_op(addr, PAGE_SIZE, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + + addr += PAGE_SIZE; + remain -= PAGE_SIZE; + } + if(remain == 0) + return(0); + + n = do_op(addr, remain, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + return(0); +} + +static int copy_chunk_from_user(unsigned long from, int len, void *arg) +{ + unsigned long *to_ptr = arg, to = *to_ptr; + + memcpy((void *) to, (void *) from, len); + *to_ptr += len; + return(0); +} + +int copy_from_user_skas(void *to, const void *from, int n) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memcpy(to, from, n); + return(0); + } + + return(access_ok_skas(VERIFY_READ, from, n) ? + buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): + n); +} + +static int copy_chunk_to_user(unsigned long to, int len, void *arg) +{ + unsigned long *from_ptr = arg, from = *from_ptr; + + memcpy((void *) to, (void *) from, len); + *from_ptr += len; + return(0); +} + +int copy_to_user_skas(void *to, const void *from, int n) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memcpy(to, from, n); + return(0); + } + + return(access_ok_skas(VERIFY_WRITE, to, n) ? + buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : + n); +} + +static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) +{ + char **to_ptr = arg, *to = *to_ptr; + int n; + + strncpy(to, (void *) from, len); + n = strnlen(to, len); + *to_ptr += n; + + if(n < len) + return(1); + return(0); +} + +int strncpy_from_user_skas(char *dst, const char *src, int count) +{ + int n; + char *ptr = dst; + + if(segment_eq(get_fs(), KERNEL_DS)){ + strncpy(dst, src, count); + return(strnlen(dst, count)); + } + + if(!access_ok_skas(VERIFY_READ, src, 1)) + return(-EFAULT); + + n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, + &ptr); + if(n != 0) + return(-EFAULT); + return(strnlen(dst, count)); +} + +static int clear_chunk(unsigned long addr, int len, void *unused) +{ + memset((void *) addr, 0, len); + return(0); +} + +int __clear_user_skas(void *mem, int len) +{ + return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); +} + +int clear_user_skas(void *mem, int len) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memset(mem, 0, len); + return(0); + } + + return(access_ok_skas(VERIFY_WRITE, mem, len) ? + buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); +} + +static int strnlen_chunk(unsigned long str, int len, void *arg) +{ + int *len_ptr = arg, n; + + n = strnlen((void *) str, len); + *len_ptr += n; + + if(n < len) + return(1); + return(0); +} + +int strnlen_user_skas(const void *str, int len) +{ + int count = 0, n; + + if(segment_eq(get_fs(), KERNEL_DS)) + return(strnlen(str, len) + 1); + + n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); + if(n == 0) + return(count + 1); + return(-EFAULT); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/kernel/skas/util/Makefile 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/um/kernel/skas/util/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -1,10 +1,10 @@ all: mk_ptregs mk_ptregs : mk_ptregs.o - $(CC) -o mk_ptregs mk_ptregs.o + $(HOSTCC) -o mk_ptregs mk_ptregs.o mk_ptregs.o : mk_ptregs.c - $(CC) -c $< + $(HOSTCC) -c $< clean : $(RM) -f mk_ptregs *.o *~ --- linux-2.6.2-rc1/arch/um/kernel/skas/util/mk_ptregs.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/arch/um/kernel/skas/util/mk_ptregs.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,3 +1,4 @@ +#include #include #include --- linux-2.6.2-rc1/arch/um/kernel/smp.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/smp.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,9 +1,15 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ #include "linux/config.h" +#include "linux/percpu.h" +#include "asm/pgalloc.h" +#include "asm/tlb.h" + +/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */ +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #ifdef CONFIG_SMP @@ -23,7 +29,7 @@ #include "os.h" /* CPU online map, set by smp_boot_cpus */ -unsigned long cpu_online_map = cpumask_of_cpu(0); +unsigned long cpu_online_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_online_map); @@ -55,7 +61,7 @@ struct task_struct *idle_threads[NR_CPUS void smp_send_reschedule(int cpu) { - write(cpu_data[cpu].ipi_pipe[1], "R", 1); + os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); num_reschedules_sent++; } @@ -100,35 +106,34 @@ void smp_send_stop(void) printk(KERN_INFO "Stopping all CPUs..."); for(i = 0; i < num_online_cpus(); i++){ - if(i == current->thread_info->cpu) + if(i == current_thread->cpu) continue; - write(cpu_data[i].ipi_pipe[1], "S", 1); + os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); } printk("done\n"); } -static cpumask_t smp_commenced_mask; -static cpumask_t smp_callin_map = CPU_MASK_NONE; +static cpumask_t smp_commenced_mask = CPU_MASK_NONE; +static cpumask_t cpu_callin_map = CPU_MASK_NONE; static int idle_proc(void *cpup) { int cpu = (int) cpup, err; err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); - if(err) - panic("CPU#%d failed to create IPI pipe, errno = %d", cpu, - -err); + if(err < 0) + panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); activate_ipi(cpu_data[cpu].ipi_pipe[0], current->thread.mode.tt.extern_pid); wmb(); - if (cpu_test_and_set(cpu, &smp_callin_map)) { + if (cpu_test_and_set(cpu, cpu_callin_map)) { printk("huh, CPU#%d already present??\n", cpu); BUG(); } - while (!cpu_isset(cpu, &smp_commenced_mask)) + while (!cpu_isset(cpu, smp_commenced_mask)) cpu_relax(); cpu_set(cpu, cpu_online_map); @@ -143,16 +148,20 @@ static struct task_struct *idle_thread(i current->thread.request.u.thread.proc = idle_proc; current->thread.request.u.thread.arg = (void *) cpu; - new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL); - if(IS_ERR(new_task)) panic("do_fork failed in idle_thread"); + new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, + NULL); + if(IS_ERR(new_task)) + panic("copy_process failed in idle_thread, error = %ld", + PTR_ERR(new_task)); cpu_tasks[cpu] = ((struct cpu_task) { .pid = new_task->thread.mode.tt.extern_pid, .task = new_task } ); idle_threads[cpu] = new_task; - CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c, + CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c, sizeof(c)), ({ panic("skas mode doesn't support SMP"); })); + wake_up_forked_process(new_task); return(new_task); } @@ -160,15 +169,17 @@ void smp_prepare_cpus(unsigned int maxcp { struct task_struct *idle; unsigned long waittime; - int err, cpu; + int err, cpu, me = smp_processor_id(); - cpu_set(0, cpu_online_map); - cpu_set(0, smp_callin_map); + cpu_clear(me, cpu_online_map); + cpu_set(me, cpu_online_map); + cpu_set(me, cpu_callin_map); - err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); - if(err) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); + err = os_pipe(cpu_data[me].ipi_pipe, 1, 1); + if(err < 0) + panic("CPU#0 failed to create IPI pipe, errno = %d", -err); - activate_ipi(cpu_data[0].ipi_pipe[0], + activate_ipi(cpu_data[me].ipi_pipe[0], current->thread.mode.tt.extern_pid); for(cpu = 1; cpu < ncpus; cpu++){ @@ -180,10 +191,10 @@ void smp_prepare_cpus(unsigned int maxcp unhash_process(idle); waittime = 200000000; - while (waittime-- && !cpu_isset(cpu, smp_callin_map)) + while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) cpu_relax(); - if (cpu_isset(cpu, smp_callin_map)) + if (cpu_isset(cpu, cpu_callin_map)) printk("done\n"); else printk("failed\n"); } @@ -216,7 +227,7 @@ void IPI_handler(int cpu) int fd; fd = cpu_data[cpu].ipi_pipe[0]; - while (read(fd, &c, 1) == 1) { + while (os_read_file(fd, &c, 1) == 1) { switch (c) { case 'C': smp_call_function_slave(cpu); @@ -273,9 +284,9 @@ int smp_call_function(void (*_func)(void info = _info; for (i=0;ithread_info->cpu) && + if((i != current_thread->cpu) && cpu_isset(i, cpu_online_map)) - write(cpu_data[i].ipi_pipe[1], "C", 1); + os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); while (atomic_read(&scf_started) != cpus) barrier(); --- linux-2.6.2-rc1/arch/um/kernel/syscall_kern.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/syscall_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -35,39 +35,40 @@ long um_mount(char * dev_name, char * di long sys_fork(void) { - struct task_struct *p; + long ret; current->thread.forking = 1; - p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); current->thread.forking = 0; - return(IS_ERR(p) ? PTR_ERR(p) : p->pid); + return(ret); } -long sys_clone(unsigned long clone_flags, unsigned long newsp) +long sys_clone(unsigned long clone_flags, unsigned long newsp, + int *parent_tid, int *child_tid) { - struct task_struct *p; + long ret; current->thread.forking = 1; - p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL); + ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); current->thread.forking = 0; - return(IS_ERR(p) ? PTR_ERR(p) : p->pid); + return(ret); } long sys_vfork(void) { - struct task_struct *p; + long ret; current->thread.forking = 1; - p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, + NULL); current->thread.forking = 0; - return(IS_ERR(p) ? PTR_ERR(p) : p->pid); + return(ret); } /* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long fd, + unsigned long pgoff) { int error = -EBADF; struct file * file = NULL; @@ -79,9 +80,9 @@ static inline long do_mmap2( goto out; } - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -93,7 +94,7 @@ long sys_mmap2(unsigned long addr, unsig unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - return do_mmap2(addr, len, prot, flags, fd, pgoff); + return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); } /* @@ -120,7 +121,8 @@ int old_mmap(unsigned long addr, unsigne if (offset & ~PAGE_MASK) goto out; - err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + err = do_mmap2(current->mm, addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); out: return err; } @@ -135,43 +137,12 @@ int sys_pipe(unsigned long * fildes) error = do_pipe(fd); if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) + if (copy_to_user(fildes, fd, sizeof(fd))) error = -EFAULT; } return error; } -int sys_sigaction(int sig, const struct old_sigaction *act, - struct old_sigaction *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - /* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. * @@ -253,7 +224,7 @@ int sys_ipc (uint call, int first, int s return sys_shmctl (first, second, (struct shmid_ds *) ptr); default: - return -EINVAL; + return -ENOSYS; } } @@ -302,11 +273,6 @@ int sys_olduname(struct oldold_utsname * return error; } -int sys_sigaltstack(const stack_t *uss, stack_t *uoss) -{ - return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); -} - long execute_syscall(void *r) { return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); --- linux-2.6.2-rc1/arch/um/kernel/sys_call_table.c 2003-06-14 12:18:30.000000000 -0700 +++ 25/arch/um/kernel/sys_call_table.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,7 +5,6 @@ #include "linux/config.h" #include "linux/unistd.h" -#include "linux/version.h" #include "linux/sys.h" #include "linux/swap.h" #include "linux/sysctl.h" @@ -219,15 +218,30 @@ extern syscall_handler_t sys_getdents64; extern syscall_handler_t sys_gettid; extern syscall_handler_t sys_readahead; extern syscall_handler_t sys_tkill; +extern syscall_handler_t sys_setxattr; +extern syscall_handler_t sys_lsetxattr; +extern syscall_handler_t sys_fsetxattr; +extern syscall_handler_t sys_getxattr; +extern syscall_handler_t sys_lgetxattr; +extern syscall_handler_t sys_fgetxattr; +extern syscall_handler_t sys_listxattr; +extern syscall_handler_t sys_llistxattr; +extern syscall_handler_t sys_flistxattr; +extern syscall_handler_t sys_removexattr; +extern syscall_handler_t sys_lremovexattr; +extern syscall_handler_t sys_fremovexattr; extern syscall_handler_t sys_sendfile64; extern syscall_handler_t sys_futex; extern syscall_handler_t sys_sched_setaffinity; extern syscall_handler_t sys_sched_getaffinity; +extern syscall_handler_t sys_set_thread_area; +extern syscall_handler_t sys_get_thread_area; extern syscall_handler_t sys_io_setup; extern syscall_handler_t sys_io_destroy; extern syscall_handler_t sys_io_getevents; extern syscall_handler_t sys_io_submit; extern syscall_handler_t sys_io_cancel; +extern syscall_handler_t sys_fadvise64; extern syscall_handler_t sys_exit_group; extern syscall_handler_t sys_lookup_dcookie; extern syscall_handler_t sys_epoll_create; @@ -235,6 +249,20 @@ extern syscall_handler_t sys_epoll_ctl; extern syscall_handler_t sys_epoll_wait; extern syscall_handler_t sys_remap_file_pages; extern syscall_handler_t sys_set_tid_address; +extern syscall_handler_t sys_timer_create; +extern syscall_handler_t sys_timer_settime; +extern syscall_handler_t sys_timer_gettime; +extern syscall_handler_t sys_timer_getoverrun; +extern syscall_handler_t sys_timer_delete; +extern syscall_handler_t sys_clock_settime; +extern syscall_handler_t sys_clock_gettime; +extern syscall_handler_t sys_clock_getres; +extern syscall_handler_t sys_clock_nanosleep; +extern syscall_handler_t sys_statfs64; +extern syscall_handler_t sys_fstatfs64; +extern syscall_handler_t sys_tgkill; +extern syscall_handler_t sys_utimes; +extern syscall_handler_t sys_fadvise64_64; #ifdef CONFIG_NFSD #define NFSSERVCTL sys_nfsservctl @@ -246,7 +274,7 @@ extern syscall_handler_t um_mount; extern syscall_handler_t um_time; extern syscall_handler_t um_stime; -#define LAST_GENERIC_SYSCALL __NR_set_tid_address +#define LAST_GENERIC_SYSCALL __NR_vserver #if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL #define LAST_SYSCALL LAST_GENERIC_SYSCALL @@ -455,32 +483,37 @@ syscall_handler_t *sys_call_table[] = { [ __NR_stat64 ] = sys_stat64, [ __NR_lstat64 ] = sys_lstat64, [ __NR_fstat64 ] = sys_fstat64, - [ __NR_fcntl64 ] = sys_fcntl64, [ __NR_getdents64 ] = sys_getdents64, + [ __NR_fcntl64 ] = sys_fcntl64, + [ 223 ] = sys_ni_syscall, [ __NR_gettid ] = sys_gettid, [ __NR_readahead ] = sys_readahead, - [ __NR_setxattr ] = sys_ni_syscall, - [ __NR_lsetxattr ] = sys_ni_syscall, - [ __NR_fsetxattr ] = sys_ni_syscall, - [ __NR_getxattr ] = sys_ni_syscall, - [ __NR_lgetxattr ] = sys_ni_syscall, - [ __NR_fgetxattr ] = sys_ni_syscall, - [ __NR_listxattr ] = sys_ni_syscall, - [ __NR_llistxattr ] = sys_ni_syscall, - [ __NR_flistxattr ] = sys_ni_syscall, - [ __NR_removexattr ] = sys_ni_syscall, - [ __NR_lremovexattr ] = sys_ni_syscall, - [ __NR_fremovexattr ] = sys_ni_syscall, + [ __NR_setxattr ] = sys_setxattr, + [ __NR_lsetxattr ] = sys_lsetxattr, + [ __NR_fsetxattr ] = sys_fsetxattr, + [ __NR_getxattr ] = sys_getxattr, + [ __NR_lgetxattr ] = sys_lgetxattr, + [ __NR_fgetxattr ] = sys_fgetxattr, + [ __NR_listxattr ] = sys_listxattr, + [ __NR_llistxattr ] = sys_llistxattr, + [ __NR_flistxattr ] = sys_flistxattr, + [ __NR_removexattr ] = sys_removexattr, + [ __NR_lremovexattr ] = sys_lremovexattr, + [ __NR_fremovexattr ] = sys_fremovexattr, [ __NR_tkill ] = sys_tkill, [ __NR_sendfile64 ] = sys_sendfile64, [ __NR_futex ] = sys_futex, [ __NR_sched_setaffinity ] = sys_sched_setaffinity, [ __NR_sched_getaffinity ] = sys_sched_getaffinity, + [ __NR_set_thread_area ] = sys_ni_syscall, + [ __NR_get_thread_area ] = sys_ni_syscall, [ __NR_io_setup ] = sys_io_setup, [ __NR_io_destroy ] = sys_io_destroy, [ __NR_io_getevents ] = sys_io_getevents, [ __NR_io_submit ] = sys_io_submit, [ __NR_io_cancel ] = sys_io_cancel, + [ __NR_fadvise64 ] = sys_fadvise64, + [ 251 ] = sys_ni_syscall, [ __NR_exit_group ] = sys_exit_group, [ __NR_lookup_dcookie ] = sys_lookup_dcookie, [ __NR_epoll_create ] = sys_epoll_create, @@ -488,6 +521,21 @@ syscall_handler_t *sys_call_table[] = { [ __NR_epoll_wait ] = sys_epoll_wait, [ __NR_remap_file_pages ] = sys_remap_file_pages, [ __NR_set_tid_address ] = sys_set_tid_address, + [ __NR_timer_create ] = sys_timer_create, + [ __NR_timer_settime ] = sys_timer_settime, + [ __NR_timer_gettime ] = sys_timer_gettime, + [ __NR_timer_getoverrun ] = sys_timer_getoverrun, + [ __NR_timer_delete ] = sys_timer_delete, + [ __NR_clock_settime ] = sys_clock_settime, + [ __NR_clock_gettime ] = sys_clock_gettime, + [ __NR_clock_getres ] = sys_clock_getres, + [ __NR_clock_nanosleep ] = sys_clock_nanosleep, + [ __NR_statfs64 ] = sys_statfs64, + [ __NR_fstatfs64 ] = sys_fstatfs64, + [ __NR_tgkill ] = sys_tgkill, + [ __NR_utimes ] = sys_utimes, + [ __NR_fadvise64_64 ] = sys_fadvise64_64, + [ __NR_vserver ] = sys_ni_syscall, ARCH_SYSCALLS [ LAST_SYSCALL + 1 ... NR_syscalls ] = --- linux-2.6.2-rc1/arch/um/kernel/sysrq.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/sysrq.c 2004-01-21 23:30:29.000000000 -0800 @@ -55,6 +55,14 @@ void show_trace_task(struct task_struct show_trace((unsigned long *)esp); } +void show_stack(struct task_struct *task, unsigned long *sp) +{ + if(task) + show_trace_task(task); + else + show_trace(sp); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.2-rc1/arch/um/kernel/tempfile.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/tempfile.c 2004-01-21 23:30:29.000000000 -0800 @@ -28,6 +28,7 @@ static void __init find_tempdir(void) } if((dir == NULL) || (*dir == '\0')) dir = "/tmp"; + tempdir = malloc(strlen(dir) + 2); if(tempdir == NULL){ fprintf(stderr, "Failed to malloc tempdir, " @@ -49,7 +50,8 @@ int make_tempfile(const char *template, else *tempname = 0; strcat(tempname, template); - if((fd = mkstemp(tempname)) < 0){ + fd = mkstemp(tempname); + if(fd < 0){ fprintf(stderr, "open - cannot create %s: %s\n", tempname, strerror(errno)); return -1; @@ -59,7 +61,8 @@ int make_tempfile(const char *template, return -1; } if(out_tempname){ - if((*out_tempname = strdup(tempname)) == NULL){ + *out_tempname = strdup(tempname); + if(*out_tempname == NULL){ perror("strdup"); return -1; } --- linux-2.6.2-rc1/arch/um/kernel/time.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/time.c 2004-01-21 23:30:29.000000000 -0800 @@ -4,24 +4,33 @@ */ #include +#include #include #include #include #include #include -#include "linux/module.h" #include "user_util.h" #include "kern_util.h" #include "user.h" #include "process.h" #include "signal_user.h" #include "time_user.h" +#include "kern_constants.h" + +/* XXX This really needs to be declared and initialized in a kernel file since + * it's in + */ +extern struct timespec wall_to_monotonic; extern struct timeval xtime; +struct timeval local_offset = { 0, 0 }; + void timer(void) { gettimeofday(&xtime, NULL); + timeradd(&xtime, &local_offset, &xtime); } void set_interval(int timer_type) @@ -66,7 +75,7 @@ void switch_timers(int to_real) errno); } -void idle_timer(void) +void uml_idle_timer(void) { if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) panic("Couldn't unset SIGVTALRM handler"); @@ -76,14 +85,54 @@ void idle_timer(void) set_interval(ITIMER_REAL); } +static unsigned long long get_host_hz(void) +{ + char mhzline[16], *end; + int ret, mult, mhz, rest, len; + + ret = cpu_feature("cpu MHz", mhzline, + sizeof(mhzline) / sizeof(mhzline[0])); + if(!ret) + panic ("Could not get host MHZ"); + + mhz = strtoul(mhzline, &end, 10); + + /* This business is to parse a floating point number without using + * floating types. + */ + + rest = 0; + mult = 0; + if(*end == '.'){ + end++; + len = strlen(end); + if(len < 6) + mult = 6 - len; + else if(len > 6) + end[6] = '\0'; + rest = strtoul(end, NULL, 10); + while(mult-- > 0) + rest *= 10; + } + + return(1000000 * mhz + rest); +} + +unsigned long long host_hz = 0; + void time_init(void) { + struct timespec now; + + host_hz = get_host_hz(); if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) panic("Couldn't set SIGVTALRM handler"); set_interval(ITIMER_VIRTUAL); -} -struct timeval local_offset = { 0, 0 }; + do_posix_clock_monotonic_gettime(&now); + wall_to_monotonic.tv_sec = -now.tv_sec; + wall_to_monotonic.tv_nsec = -now.tv_nsec; +} void do_gettimeofday(struct timeval *tv) { @@ -95,15 +144,13 @@ void do_gettimeofday(struct timeval *tv) time_unlock(flags); } -EXPORT_SYMBOL(do_gettimeofday); - int do_settimeofday(struct timespec *tv) { struct timeval now; unsigned long flags; struct timeval tv_in; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC) return -EINVAL; tv_in.tv_sec = tv->tv_sec; @@ -113,9 +160,9 @@ int do_settimeofday(struct timespec *tv) gettimeofday(&now, NULL); timersub(&tv_in, &now, &local_offset); time_unlock(flags); -} -EXPORT_SYMBOL(do_settimeofday); + return(0); +} void idle_sleep(int secs) { --- linux-2.6.2-rc1/arch/um/kernel/time_kern.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/um/kernel/time_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -30,6 +30,14 @@ int hz(void) return(HZ); } +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + return (unsigned long long)jiffies_64 * (1000000000 / HZ); +} + /* Changed at early boot */ int timer_irq_inited = 0; @@ -39,13 +47,47 @@ int timer_irq_inited = 0; */ int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; +static int first_tick; +static unsigned long long prev_tsc; +static long long delta; /* Deviation per interval */ + +extern unsigned long long host_hz; + void timer_irq(union uml_pt_regs *regs) { - int cpu = current->thread_info->cpu, ticks = missed_ticks[cpu]; + unsigned long long ticks = 0; + + if(!timer_irq_inited){ + /* This is to ensure that ticks don't pile up when + * the timer handler is suspended */ + first_tick = 0; + return; + } + + if(first_tick){ +#if defined(CONFIG_UML_REAL_TIME_CLOCK) + unsigned long long tsc; + /* We've had 1 tick */ + tsc = time_stamp(); + + delta += tsc - prev_tsc; + prev_tsc = tsc; + + ticks += (delta * HZ) / host_hz; + delta -= (ticks * host_hz) / HZ; +#else + ticks = 1; +#endif + } + else { + prev_tsc = time_stamp(); + first_tick = 1; + } - if(!timer_irq_inited) return; - missed_ticks[cpu] = 0; - while(ticks--) do_IRQ(TIMER_IRQ, regs); + while(ticks > 0){ + do_IRQ(TIMER_IRQ, regs); + ticks--; + } } void boot_timer_handler(int sig) @@ -58,12 +100,13 @@ void boot_timer_handler(int sig) do_timer(®s); } -void um_timer(int irq, void *dev, struct pt_regs *regs) +irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs) { do_timer(regs); - write_seqlock(&xtime_lock); + write_seqlock_irq(&xtime_lock); timer(); - write_sequnlock(&xtime_lock); + write_sequnlock_irq(&xtime_lock); + return(IRQ_HANDLED); } long um_time(int * tloc) @@ -81,12 +124,12 @@ long um_time(int * tloc) long um_stime(int * tptr) { int value; - struct timeval new; + struct timespec new; if (get_user(value, tptr)) return -EFAULT; new.tv_sec = value; - new.tv_usec = 0; + new.tv_nsec = 0; do_settimeofday(&new); return 0; } @@ -125,9 +168,11 @@ void __const_udelay(um_udelay_t usecs) void timer_handler(int sig, union uml_pt_regs *regs) { #ifdef CONFIG_SMP + local_irq_disable(); update_process_times(user_context(UPT_SP(regs))); + local_irq_enable(); #endif - if(current->thread_info->cpu == 0) + if(current_thread->cpu == 0) timer_irq(regs); } @@ -136,6 +181,7 @@ static spinlock_t timer_spinlock = SPIN_ unsigned long time_lock(void) { unsigned long flags; + spin_lock_irqsave(&timer_spinlock, flags); return(flags); } @@ -150,8 +196,8 @@ int __init timer_init(void) int err; CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); - if((err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", - NULL)) != 0) + err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); + if(err != 0) printk(KERN_ERR "timer_init : request_irq failed - " "errno = %d\n", -err); timer_irq_inited = 1; @@ -160,7 +206,6 @@ int __init timer_init(void) __initcall(timer_init); - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.2-rc1/arch/um/kernel/trap_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/kernel/trap_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -16,12 +16,15 @@ #include "asm/tlbflush.h" #include "asm/a.out.h" #include "asm/current.h" +#include "asm/irq.h" #include "user_util.h" #include "kern_util.h" #include "kern.h" #include "chan_kern.h" #include "mconsole_kern.h" #include "2_5compat.h" +#include "mem.h" +#include "mem_kern.h" int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) @@ -51,12 +54,12 @@ int handle_page_fault(unsigned long addr if(is_write && !(vma->vm_flags & VM_WRITE)) goto out; page = address & PAGE_MASK; - if(page == (unsigned long) current->thread_info + PAGE_SIZE) + if(page == (unsigned long) current_thread + PAGE_SIZE) panic("Kernel stack overflow"); pgd = pgd_offset(mm, page); pmd = pmd_offset(pgd, page); - survive: do { + survive: switch (handle_mm_fault(mm, vma, address, is_write)){ case VM_FAULT_MINOR: current->min_flt++; @@ -71,14 +74,20 @@ int handle_page_fault(unsigned long addr err = -ENOMEM; goto out_of_memory; default: - BUG(); + if (current->pid == 1) { + up_read(&mm->mmap_sem); + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + goto out; } pte = pte_offset_kernel(pmd, page); } while(!pte_present(*pte)); + err = 0; *pte = pte_mkyoung(*pte); if(pte_write(*pte)) *pte = pte_mkdirty(*pte); flush_tlb_page(vma, page); - err = 0; out: up_read(&mm->mmap_sem); return(err); @@ -98,6 +107,33 @@ out_of_memory: goto out; } +LIST_HEAD(physmem_remappers); + +void register_remapper(struct remapper *info) +{ + list_add(&info->list, &physmem_remappers); +} + +static int check_remapped_addr(unsigned long address, int is_write) +{ + struct remapper *remapper; + struct list_head *ele; + __u64 offset; + int fd; + + fd = phys_mapping(__pa(address), &offset); + if(fd == -1) + return(0); + + list_for_each(ele, &physmem_remappers){ + remapper = list_entry(ele, struct remapper, list); + if((*remapper->proc)(fd, address, is_write, offset)) + return(1); + } + + return(0); +} + unsigned long segv(unsigned long address, unsigned long ip, int is_write, int is_user, void *sc) { @@ -109,7 +145,9 @@ unsigned long segv(unsigned long address flush_tlb_kernel_vm(); return(0); } - if(current->mm == NULL) + else if(check_remapped_addr(address & PAGE_MASK, is_write)) + return(0); + else if(current->mm == NULL) panic("Segfault with no mm"); err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); @@ -120,9 +158,8 @@ unsigned long segv(unsigned long address current->thread.fault_addr = (void *) address; do_longjmp(catcher, 1); } - else if(current->thread.fault_addr != NULL){ + else if(current->thread.fault_addr != NULL) panic("fault_addr set but no fault catcher"); - } else if(arch_fixup(ip, sc)) return(0); @@ -155,8 +192,6 @@ void bad_segv(unsigned long address, uns { struct siginfo si; - printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx " - "(ip 0x%lx)\n", current->comm, current->pid, address, ip); si.si_signo = SIGSEGV; si.si_code = SEGV_ACCERR; si.si_addr = (void *) address; @@ -180,6 +215,11 @@ void bus_handler(int sig, union uml_pt_r else relay_signal(sig, regs); } +void winch(int sig, union uml_pt_regs *regs) +{ + do_IRQ(WINCH_IRQ, regs); +} + void trap_init(void) { } --- linux-2.6.2-rc1/arch/um/kernel/trap_user.c 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/kernel/trap_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,11 +5,9 @@ #include #include -#include #include #include #include -#include #include #include #include @@ -82,6 +80,8 @@ struct signal_info sig_info[] = { .is_irq = 0 }, [ SIGILL ] { .handler = relay_signal, .is_irq = 0 }, + [ SIGWINCH ] { .handler = winch, + .is_irq = 1 }, [ SIGBUS ] { .handler = bus_handler, .is_irq = 0 }, [ SIGSEGV] { .handler = segv_handler, @@ -123,7 +123,7 @@ void do_longjmp(void *b, int val) { jmp_buf *buf = b; - longjmp(*buf, val); + siglongjmp(*buf, val); } /* --- linux-2.6.2-rc1/arch/um/kernel/tt/exec_kern.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/tt/exec_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -17,6 +17,7 @@ #include "mem_user.h" #include "os.h" #include "tlb.h" +#include "mode.h" static int exec_tramp(void *sig_stack) { @@ -47,17 +48,17 @@ void flush_thread_tt(void) do_exit(SIGKILL); } - if(current->thread_info->cpu == 0) + if(current_thread->cpu == 0) forward_interrupts(new_pid); current->thread.request.op = OP_EXEC; current->thread.request.u.exec.pid = new_pid; - unprotect_stack((unsigned long) current->thread_info); + unprotect_stack((unsigned long) current_thread); os_usr1_process(os_getpid()); enable_timer(); free_page(stack); protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); - task_protections((unsigned long) current->thread_info); + task_protections((unsigned long) current_thread); force_flush_all(); unblock_signals(); } --- linux-2.6.2-rc1/arch/um/kernel/tt/include/mode.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/tt/include/mode.h 2004-01-21 23:30:29.000000000 -0800 @@ -8,6 +8,8 @@ #include "sysdep/ptrace.h" +enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; + extern int tracing_pid; extern int tracer(int (*init_proc)(void *), void *sp); --- linux-2.6.2-rc1/arch/um/kernel/tt/include/uaccess.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/tt/include/uaccess.h 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -43,65 +43,19 @@ extern unsigned long get_fault_addr(void extern int __do_copy_from_user(void *to, const void *from, int n, void **fault_addr, void **fault_catcher); - -static inline int copy_from_user_tt(void *to, const void *from, int n) -{ - return(access_ok_tt(VERIFY_READ, from, n) ? - __do_copy_from_user(to, from, n, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher) : n); -} - -static inline int copy_to_user_tt(void *to, const void *from, int n) -{ - return(access_ok_tt(VERIFY_WRITE, to, n) ? - __do_copy_to_user(to, from, n, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher) : n); -} - extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, void **fault_addr, void **fault_catcher); - -static inline int strncpy_from_user_tt(char *dst, const char *src, int count) -{ - int n; - - if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT); - n = __do_strncpy_from_user(dst, src, count, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher); - if(n < 0) return(-EFAULT); - return(n); -} - extern int __do_clear_user(void *mem, size_t len, void **fault_addr, void **fault_catcher); - -static inline int __clear_user_tt(void *mem, int len) -{ - return(__do_clear_user(mem, len, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)); -} - -static inline int clear_user_tt(void *mem, int len) -{ - return(access_ok_tt(VERIFY_WRITE, mem, len) ? - __do_clear_user(mem, len, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher) : len); -} - extern int __do_strnlen_user(const char *str, unsigned long n, void **fault_addr, void **fault_catcher); -static inline int strnlen_user_tt(const void *str, int len) -{ - return(__do_strnlen_user(str, len, - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)); -} +extern int copy_from_user_tt(void *to, const void *from, int n); +extern int copy_to_user_tt(void *to, const void *from, int n); +extern int strncpy_from_user_tt(char *dst, const char *src, int count); +extern int __clear_user_tt(void *mem, int len); +extern int clear_user_tt(void *mem, int len); +extern int strnlen_user_tt(const void *str, int len); #endif --- linux-2.6.2-rc1/arch/um/kernel/tt/Makefile 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/um/kernel/tt/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ # -# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) # Licensed under the GPL # @@ -7,7 +7,7 @@ extra-y := unmap_fin.o obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ - uaccess_user.o sys-$(SUBARCH)/ + uaccess.o uaccess_user.o sys-$(SUBARCH)/ obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ --- linux-2.6.2-rc1/arch/um/kernel/tt/mem_user.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/arch/um/kernel/tt/mem_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -25,14 +25,13 @@ void remap_data(void *segment_start, voi size = (unsigned long) segment_end - (unsigned long) segment_start; data = create_mem_file(size); - if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ, - MAP_SHARED, data, 0)) == MAP_FAILED){ + addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0); + if(addr == MAP_FAILED){ perror("mapping new data segment"); exit(1); } memcpy(addr, segment_start, size); - if(switcheroo(data, prot, addr, segment_start, - size) < 0){ + if(switcheroo(data, prot, addr, segment_start, size) < 0){ printf("switcheroo failed\n"); exit(1); } --- linux-2.6.2-rc1/arch/um/kernel/tt/process_kern.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/tt/process_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -62,7 +62,7 @@ void *switch_to_tt(void *prev, void *nex reading = 0; err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); if(err != sizeof(c)) - panic("write of switch_pipe failed, errno = %d", -err); + panic("write of switch_pipe failed, err = %d", -err); reading = 1; if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD)) @@ -104,48 +104,72 @@ void *switch_to_tt(void *prev, void *nex void release_thread_tt(struct task_struct *task) { - os_kill_process(task->thread.mode.tt.extern_pid, 0); + int pid = task->thread.mode.tt.extern_pid; + + if(os_getpid() != pid) + os_kill_process(pid, 0); } void exit_thread_tt(void) { - close(current->thread.mode.tt.switch_pipe[0]); - close(current->thread.mode.tt.switch_pipe[1]); + os_close_file(current->thread.mode.tt.switch_pipe[0]); + os_close_file(current->thread.mode.tt.switch_pipe[1]); } void schedule_tail(task_t *prev); static void new_thread_handler(int sig) { + unsigned long disable; int (*fn)(void *); void *arg; fn = current->thread.request.u.thread.proc; arg = current->thread.request.u.thread.arg; + UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); + disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) | + (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1)); + SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable; + suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); - block_signals(); + force_flush_all(); + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + init_new_thread_signals(1); -#ifdef CONFIG_SMP - schedule_tail(current->thread.prev_sched); -#endif enable_timer(); free_page(current->thread.temp_stack); set_cmdline("(kernel thread)"); - force_flush_all(); - current->thread.prev_sched = NULL; change_sig(SIGUSR1, 1); change_sig(SIGVTALRM, 1); change_sig(SIGPROF, 1); - unblock_signals(); + local_irq_enable(); if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) do_exit(0); } static int new_thread_proc(void *stack) { + /* local_irq_disable is needed to block out signals until this thread is + * properly scheduled. Otherwise, the tracing thread will get mighty + * upset about any signals that arrive before that. + * This has the complication that it sets the saved signal mask in + * the sigcontext to block signals. This gets restored when this + * thread (or a descendant, since they get a copy of this sigcontext) + * returns to userspace. + * So, this is compensated for elsewhere. + * XXX There is still a small window until local_irq_disable() actually + * finishes where signals are possible - shouldn't be a problem in + * practice since SIGIO hasn't been forwarded here yet, and the + * local_irq_disable should finish before a SIGVTALRM has time to be + * delivered. + */ + + local_irq_disable(); init_new_thread_stack(stack, new_thread_handler); os_usr1_process(os_getpid()); return(0); @@ -156,7 +180,7 @@ static int new_thread_proc(void *stack) * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, * so it is blocked before it's called. They are re-enabled on sigreturn * despite the fact that they were blocked when the SIGUSR1 was issued because - * copy_thread copies the parent's signcontext, including the signal mask + * copy_thread copies the parent's sigcontext, including the signal mask * onto the signal frame. */ @@ -165,35 +189,32 @@ void finish_fork_handler(int sig) UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); -#ifdef CONFIG_SMP - schedule_tail(NULL); -#endif + force_flush_all(); + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + enable_timer(); change_sig(SIGVTALRM, 1); local_irq_enable(); - force_flush_all(); if(current->mm != current->parent->mm) protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); - task_protections((unsigned long) current->thread_info); - - current->thread.prev_sched = NULL; + task_protections((unsigned long) current_thread); free_page(current->thread.temp_stack); + local_irq_disable(); change_sig(SIGUSR1, 0); set_user_mode(current); } -static int sigusr1 = SIGUSR1; - int fork_tramp(void *stack) { - int sig = sigusr1; - local_irq_disable(); + arch_init_thread(); init_new_thread_stack(stack, finish_fork_handler); - kill(os_getpid(), sig); + os_usr1_process(os_getpid()); return(0); } @@ -213,8 +234,8 @@ int copy_thread_tt(int nr, unsigned long } err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1); - if(err){ - printk("copy_thread : pipe failed, errno = %d\n", -err); + if(err < 0){ + printk("copy_thread : pipe failed, err = %d\n", -err); return(err); } @@ -377,8 +398,8 @@ static void mprotect_kernel_mem(int w) pages = (1 << CONFIG_KERNEL_STACK_ORDER); - start = (unsigned long) current->thread_info + PAGE_SIZE; - end = (unsigned long) current + PAGE_SIZE * pages; + start = (unsigned long) current_thread + PAGE_SIZE; + end = (unsigned long) current_thread + PAGE_SIZE * pages; protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); protect_memory(end, high_physmem - end, 1, w, 1, 1); @@ -454,8 +475,9 @@ void set_init_pid(int pid) init_task.thread.mode.tt.extern_pid = pid; err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1); - if(err) panic("Can't create switch pipe for init_task, errno = %d", - err); + if(err) + panic("Can't create switch pipe for init_task, errno = %d", + -err); } int singlestepping_tt(void *t) --- linux-2.6.2-rc1/arch/um/kernel/tt/ptproxy/proxy.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/kernel/tt/ptproxy/proxy.c 2004-01-21 23:30:29.000000000 -0800 @@ -15,7 +15,6 @@ Jeff Dike (jdike@karaya.com) : Modified #include #include #include -#include #include #include #include @@ -293,10 +292,10 @@ void fake_child_exit(void) } char gdb_init_string[] = -"att 1 -b panic -b stop -handle SIGWINCH nostop noprint pass +"att 1 \n\ +b panic \n\ +b stop \n\ +handle SIGWINCH nostop noprint pass \n\ "; int start_debugger(char *prog, int startup, int stop, int *fd_out) @@ -304,7 +303,8 @@ int start_debugger(char *prog, int start int slave, child; slave = open_gdb_chan(); - if((child = fork()) == 0){ + child = fork(); + if(child == 0){ char *tempname = NULL; int fd; @@ -327,18 +327,19 @@ int start_debugger(char *prog, int start exit(1); #endif } - if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){ - printk("start_debugger : make_tempfile failed, errno = %d\n", - errno); + fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0); + if(fd < 0){ + printk("start_debugger : make_tempfile failed," + "err = %d\n", -fd); exit(1); } - write(fd, gdb_init_string, sizeof(gdb_init_string) - 1); + os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); if(startup){ if(stop){ - write(fd, "b start_kernel\n", + os_write_file(fd, "b start_kernel\n", strlen("b start_kernel\n")); } - write(fd, "c\n", strlen("c\n")); + os_write_file(fd, "c\n", strlen("c\n")); } if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ printk("start_debugger : PTRACE_TRACEME failed, " --- linux-2.6.2-rc1/arch/um/kernel/tt/ptproxy/sysdep.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/kernel/tt/ptproxy/sysdep.c 2004-01-21 23:30:29.000000000 -0800 @@ -9,6 +9,7 @@ terms and conditions. #include #include #include +#include #include #include #include --- linux-2.6.2-rc1/arch/um/kernel/tt/ptproxy/wait.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/arch/um/kernel/tt/ptproxy/wait.c 2004-01-21 23:30:29.000000000 -0800 @@ -56,21 +56,23 @@ int parent_wait_return(struct debugger * int real_wait_return(struct debugger *debugger) { unsigned long ip; - int err, pid; + int pid; pid = debugger->pid; + ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); ip = IP_RESTART_SYSCALL(ip); - err = ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip); + if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) tracer_panic("real_wait_return : Failed to restart system " - "call, errno = %d\n"); + "call, errno = %d\n", errno); + if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || debugger_normal_return(debugger, -1)) tracer_panic("real_wait_return : gdb failed to wait, " - "errno = %d\n"); + "errno = %d\n", errno); return(0); } --- linux-2.6.2-rc1/arch/um/kernel/tt/syscall_kern.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/kernel/tt/syscall_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ --- linux-2.6.2-rc1/arch/um/kernel/tt/tracer.c 2003-06-14 12:17:56.000000000 -0700 +++ 25/arch/um/kernel/tt/tracer.c 2004-01-21 23:30:29.000000000 -0800 @@ -39,16 +39,17 @@ int is_tracer_winch(int pid, int fd, voi return(0); register_winch_irq(tracer_winch[0], fd, -1, data); - return(0); + return(1); } static void tracer_winch_handler(int sig) { + int n; char c = 1; - if(write(tracer_winch[1], &c, sizeof(c)) != sizeof(c)) - printk("tracer_winch_handler - write failed, errno = %d\n", - errno); + n = os_write_file(tracer_winch[1], &c, sizeof(c)); + if(n != sizeof(c)) + printk("tracer_winch_handler - write failed, err = %d\n", -n); } /* Called only by the tracing thread during initialization */ @@ -58,9 +59,8 @@ static void setup_tracer_winch(void) int err; err = os_pipe(tracer_winch, 1, 1); - if(err){ - printk("setup_tracer_winch : os_pipe failed, errno = %d\n", - -err); + if(err < 0){ + printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err); return; } signal(SIGWINCH, tracer_winch_handler); @@ -130,8 +130,8 @@ static void sleeping_process_signal(int case SIGTSTP: if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) tracer_panic("sleeping_process_signal : Failed to " - "continue pid %d, errno = %d\n", pid, - sig); + "continue pid %d, signal = %d, " + "errno = %d\n", pid, sig, errno); break; /* This happens when the debugger (e.g. strace) is doing system call @@ -145,7 +145,7 @@ static void sleeping_process_signal(int if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) tracer_panic("sleeping_process_signal : Failed to " "PTRACE_SYSCALL pid %d, errno = %d\n", - pid, sig); + pid, errno); break; case SIGSTOP: break; @@ -218,7 +218,7 @@ int tracer(int (*init_proc)(void *), voi err = attach(debugger_parent); if(err){ printf("Failed to attach debugger parent %d, " - "errno = %d\n", debugger_parent, err); + "errno = %d\n", debugger_parent, -err); debugger_parent = -1; } else { @@ -233,7 +233,8 @@ int tracer(int (*init_proc)(void *), voi } set_cmdline("(tracing thread)"); while(1){ - if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){ + pid = waitpid(-1, &status, WUNTRACED); + if(pid <= 0){ if(errno != ECHILD){ printf("wait failed - errno = %d\n", errno); } @@ -401,7 +402,7 @@ static int __init uml_debug_setup(char * if(!strcmp(line, "go")) debug_stop = 0; else if(!strcmp(line, "parent")) debug_parent = 1; - else printk("Unknown debug option : '%s'\n", line); + else printf("Unknown debug option : '%s'\n", line); line = next; } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/kernel/tt/uaccess.c 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "asm/uaccess.h" + +int copy_from_user_tt(void *to, const void *from, int n) +{ + if(!access_ok_tt(VERIFY_READ, from, n)) + return(n); + + return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int copy_to_user_tt(void *to, const void *from, int n) +{ + if(!access_ok_tt(VERIFY_WRITE, to, n)) + return(n); + + return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int strncpy_from_user_tt(char *dst, const char *src, int count) +{ + int n; + + if(!access_ok_tt(VERIFY_READ, src, 1)) + return(-EFAULT); + + n = __do_strncpy_from_user(dst, src, count, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher); + if(n < 0) return(-EFAULT); + return(n); +} + +int __clear_user_tt(void *mem, int len) +{ + return(__do_clear_user(mem, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int clear_user_tt(void *mem, int len) +{ + if(!access_ok_tt(VERIFY_WRITE, mem, len)) + return(len); + + return(__do_clear_user(mem, len, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int strnlen_user_tt(const void *str, int len) +{ + return(__do_strnlen_user(str, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/kernel/tt/uaccess_user.c 2003-06-14 12:18:06.000000000 -0700 +++ 25/arch/um/kernel/tt/uaccess_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,15 +8,20 @@ #include #include "user_util.h" #include "uml_uaccess.h" +#include "task.h" +#include "kern_util.h" int __do_copy_from_user(void *to, const void *from, int n, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; unsigned long fault; int faulted; fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, __do_copy, &faulted); + TASK_REGS(get_current())->tt = save; + if(!faulted) return(0); else return(n - (fault - (unsigned long) from)); } @@ -29,11 +34,14 @@ static void __do_strncpy(void *dst, cons int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; unsigned long fault; int faulted; fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, __do_strncpy, &faulted); + TASK_REGS(get_current())->tt = save; + if(!faulted) return(strlen(dst)); else return(-1); } @@ -46,11 +54,14 @@ static void __do_clear(void *to, const v int __do_clear_user(void *mem, unsigned long len, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; unsigned long fault; int faulted; fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, __do_clear, &faulted); + TASK_REGS(get_current())->tt = save; + if(!faulted) return(0); else return(len - (fault - (unsigned long) mem)); } @@ -58,19 +69,20 @@ int __do_clear_user(void *mem, unsigned int __do_strnlen_user(const char *str, unsigned long n, void **fault_addr, void **fault_catcher) { + struct tt_regs save = TASK_REGS(get_current())->tt; int ret; unsigned long *faddrp = (unsigned long *)fault_addr; jmp_buf jbuf; *fault_catcher = &jbuf; - if(setjmp(jbuf) == 0){ + if(sigsetjmp(jbuf, 1) == 0) ret = strlen(str) + 1; - } - else { - ret = *faddrp - (unsigned long) str; - } + else ret = *faddrp - (unsigned long) str; + *fault_addr = NULL; *fault_catcher = NULL; + + TASK_REGS(get_current())->tt = save; return ret; } --- linux-2.6.2-rc1/arch/um/kernel/tt/unmap.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/kernel/tt/unmap.c 2004-01-21 23:30:29.000000000 -0800 @@ -3,10 +3,7 @@ * Licensed under the GPL */ -#include -#include #include -#include "user.h" int switcheroo(int fd, int prot, void *from, void *to, int size) { --- linux-2.6.2-rc1/arch/um/kernel/tty_log.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/um/kernel/tty_log.c 2004-01-21 23:30:29.000000000 -0800 @@ -9,10 +9,10 @@ #include #include #include -#include #include #include "init.h" #include "user.h" +#include "kern_util.h" #include "os.h" #define TTY_LOG_DIR "./" @@ -24,29 +24,40 @@ static int tty_log_fd = -1; #define TTY_LOG_OPEN 1 #define TTY_LOG_CLOSE 2 #define TTY_LOG_WRITE 3 +#define TTY_LOG_EXEC 4 + +#define TTY_READ 1 +#define TTY_WRITE 2 struct tty_log_buf { int what; unsigned long tty; int len; + int direction; + unsigned long sec; + unsigned long usec; }; -int open_tty_log(void *tty) +int open_tty_log(void *tty, void *current_tty) { struct timeval tv; struct tty_log_buf data; char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; int fd; + gettimeofday(&tv, NULL); if(tty_log_fd != -1){ - data = ((struct tty_log_buf) { what : TTY_LOG_OPEN, - tty : (unsigned long) tty, - len : 0 }); - write(tty_log_fd, &data, sizeof(data)); + data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, + .tty = (unsigned long) tty, + .len = sizeof(current_tty), + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + os_write_file(tty_log_fd, ¤t_tty, data.len); return(tty_log_fd); } - gettimeofday(&tv, NULL); sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, (unsigned int) tv.tv_usec); @@ -62,30 +73,117 @@ int open_tty_log(void *tty) void close_tty_log(int fd, void *tty) { struct tty_log_buf data; + struct timeval tv; if(tty_log_fd != -1){ - data = ((struct tty_log_buf) { what : TTY_LOG_CLOSE, - tty : (unsigned long) tty, - len : 0 }); - write(tty_log_fd, &data, sizeof(data)); + gettimeofday(&tv, NULL); + data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, + .tty = (unsigned long) tty, + .len = 0, + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); return; } - close(fd); + os_close_file(fd); } -int write_tty_log(int fd, char *buf, int len, void *tty) +static int log_chunk(int fd, const char *buf, int len) { + int total = 0, try, missed, n; + char chunk[64]; + + while(len > 0){ + try = (len > sizeof(chunk)) ? sizeof(chunk) : len; + missed = copy_from_user_proc(chunk, (char *) buf, try); + try -= missed; + n = os_write_file(fd, chunk, try); + if(n != try) { + if(n < 0) + return(n); + return(-EIO); + } + if(missed != 0) + return(-EFAULT); + + len -= try; + total += try; + buf += try; + } + + return(total); +} + +int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) +{ + struct timeval tv; struct tty_log_buf data; + int direction; if(fd == tty_log_fd){ - data = ((struct tty_log_buf) { what : TTY_LOG_WRITE, - tty : (unsigned long) tty, - len : len }); - write(tty_log_fd, &data, sizeof(data)); + gettimeofday(&tv, NULL); + direction = is_read ? TTY_READ : TTY_WRITE; + data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, + .tty = (unsigned long) tty, + .len = len, + .direction = direction, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); } - return(write(fd, buf, len)); + + return(log_chunk(fd, buf, len)); } +void log_exec(char **argv, void *tty) +{ + struct timeval tv; + struct tty_log_buf data; + char **ptr,*arg; + int len; + + if(tty_log_fd == -1) return; + + gettimeofday(&tv, NULL); + + len = 0; + for(ptr = argv; ; ptr++){ + if(copy_from_user_proc(&arg, ptr, sizeof(arg))) + return; + if(arg == NULL) break; + len += strlen_user_proc(arg); + } + + data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, + .tty = (unsigned long) tty, + .len = len, + .direction = 0, + .sec = tv.tv_sec, + .usec = tv.tv_usec } ); + os_write_file(tty_log_fd, &data, sizeof(data)); + + for(ptr = argv; ; ptr++){ + if(copy_from_user_proc(&arg, ptr, sizeof(arg))) + return; + if(arg == NULL) break; + log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); + } +} + +extern void register_tty_logger(int (*opener)(void *, void *), + int (*writer)(int, const char *, int, + void *, int), + void (*closer)(int, void *)); + +static int register_logger(void) +{ + register_tty_logger(open_tty_log, write_tty_log, close_tty_log); + return(0); +} + +__uml_initcall(register_logger); + static int __init set_tty_log_dir(char *name, int *add) { tty_log_dir = name; @@ -104,7 +202,7 @@ static int __init set_tty_log_fd(char *n tty_log_fd = strtoul(name, &end, 0); if((*end != '\0') || (end == name)){ - printk("set_tty_log_fd - strtoul failed on '%s'\n", name); + printf("set_tty_log_fd - strtoul failed on '%s'\n", name); tty_log_fd = -1; } return 0; --- linux-2.6.2-rc1/arch/um/kernel/uaccess_user.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/arch/um/kernel/uaccess_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -20,7 +20,7 @@ unsigned long __do_user_copy(void *to, c jmp_buf jbuf; *fault_catcher = &jbuf; - if(setjmp(jbuf) == 0){ + if(sigsetjmp(jbuf, 1) == 0){ (*op)(to, from, n); ret = 0; *faulted_out = 0; --- linux-2.6.2-rc1/arch/um/kernel/um_arch.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/um/kernel/um_arch.c 2004-01-21 23:30:29.000000000 -0800 @@ -38,13 +38,18 @@ #include "mode_kern.h" #include "mode.h" -#define DEFAULT_COMMAND_LINE "root=6200" +#define DEFAULT_COMMAND_LINE "root=98:0" struct cpuinfo_um boot_cpu_data = { .loops_per_jiffy = 0, .ipi_pipe = { -1, -1 } }; +/* Placeholder to make UML link until the vsyscall stuff is actually + * implemented + */ +void *__kernel_vsyscall; + unsigned long thread_saved_pc(struct task_struct *task) { return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, @@ -53,18 +58,22 @@ unsigned long thread_saved_pc(struct tas static int show_cpuinfo(struct seq_file *m, void *v) { - int index; + int index = 0; - index = (struct cpuinfo_um *)v - cpu_data; #ifdef CONFIG_SMP + index = (struct cpuinfo_um *) v - cpu_data; if (!cpu_online(index)) return 0; #endif - seq_printf(m, "bogomips\t: %lu.%02lu\n", + seq_printf(m, "processor\t: %d\n", index); + seq_printf(m, "vendor_id\t: User Mode Linux\n"); + seq_printf(m, "model name\t: UML\n"); + seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); + seq_printf(m, "host\t\t: %s\n", host_info); + seq_printf(m, "bogomips\t: %lu.%02lu\n\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); - seq_printf(m, "host\t\t: %s\n", host_info); return(0); } @@ -134,12 +143,12 @@ void set_cmdline(char *cmd) if(umid != NULL){ snprintf(argv1_begin, (argv1_end - argv1_begin) * sizeof(*ptr), - "(%s)", umid); + "(%s) ", umid); ptr = &argv1_begin[strlen(argv1_begin)]; } else ptr = argv1_begin; - snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd); + snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); memset(argv1_begin + strlen(argv1_begin), '\0', argv1_end - argv1_begin - strlen(argv1_begin)); #endif @@ -179,7 +188,7 @@ __uml_setup("root=", uml_root_setup, static int __init uml_ncpus_setup(char *line, int *add) { if (!sscanf(line, "%d", &ncpus)) { - printk("Couldn't parse [%s]\n", line); + printf("Couldn't parse [%s]\n", line); return -1; } @@ -210,7 +219,7 @@ static int __init mode_tt_setup(char *li static int __init mode_tt_setup(char *line, int *add) { - printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); + printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); return(0); } @@ -221,7 +230,7 @@ static int __init mode_tt_setup(char *li static int __init mode_tt_setup(char *line, int *add) { - printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); + printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); return(0); } @@ -291,7 +300,7 @@ static void __init uml_postsetup(void) /* Set during early boot */ unsigned long brk_start; -static struct vm_reserved kernel_vm_reserved; +unsigned long end_iomem; #define MIN_VMALLOC (32 * 1024 * 1024) @@ -299,7 +308,7 @@ int linux_main(int argc, char **argv) { unsigned long avail; unsigned long virtmem_size, max_physmem; - unsigned int i, add, err; + unsigned int i, add; for (i = 1; i < argc; i++){ if((i == 1) && (argv[i][0] == ' ')) continue; @@ -328,12 +337,16 @@ int linux_main(int argc, char **argv) argv1_end = &argv[1][strlen(argv[1])]; #endif - set_usable_vm(uml_physmem, get_kmem_end()); - highmem = 0; - max_physmem = get_kmem_end() - uml_physmem - MIN_VMALLOC; - if(physmem_size > max_physmem){ - highmem = physmem_size - max_physmem; + iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; + + /* Zones have to begin on a 1 << MAX_ORDER page boundary, + * so this makes sure that's true for highmem + */ + max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); + if(physmem_size + iomem_size > max_physmem){ + highmem = physmem_size + iomem_size - max_physmem; physmem_size -= highmem; #ifndef CONFIG_HIGHMEM highmem = 0; @@ -343,11 +356,19 @@ int linux_main(int argc, char **argv) } high_physmem = uml_physmem + physmem_size; - high_memory = (void *) high_physmem; + end_iomem = high_physmem + iomem_size; + high_memory = (void *) end_iomem; start_vm = VMALLOC_START; - setup_physmem(uml_physmem, uml_reserved, physmem_size); + setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); + if(init_maps(physmem_size, iomem_size, highmem)){ + printf("Failed to allocate mem_map for %ld bytes of physical " + "memory and %ld bytes of highmem\n", physmem_size, + highmem); + exit(1); + } + virtmem_size = physmem_size; avail = get_kmem_end() - start_vm; if(physmem_size > avail) virtmem_size = avail; @@ -357,18 +378,13 @@ int linux_main(int argc, char **argv) printf("Kernel virtual memory size shrunk to %ld bytes\n", virtmem_size); - err = reserve_vm(high_physmem, end_vm, &kernel_vm_reserved); - if(err){ - printf("Failed to reserve VM area for kernel VM\n"); - exit(1); - } - uml_postsetup(); init_task.thread.kernel_stack = (unsigned long) &init_thread_info + 2 * PAGE_SIZE; task_protections((unsigned long) &init_thread_info); + os_flush_stdout(); return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); } @@ -403,6 +419,7 @@ void __init check_bugs(void) arch_check_bugs(); check_ptrace(); check_sigio(); + check_devanon(); } /* --- linux-2.6.2-rc1/arch/um/kernel/umid.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/arch/um/kernel/umid.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -33,18 +32,19 @@ static char *uml_dir = UML_DIR; static int umid_is_random = 1; static int umid_inited = 0; -static int make_umid(void); +static int make_umid(int (*printer)(const char *fmt, ...)); -static int __init set_umid(char *name, int is_random) +static int __init set_umid(char *name, int is_random, + int (*printer)(const char *fmt, ...)) { if(umid_inited){ - printk("Unique machine name can't be set twice\n"); + (*printer)("Unique machine name can't be set twice\n"); return(-1); } if(strlen(name) > UMID_LEN - 1) - printk("Unique machine name is being truncated to %s " - "characters\n", UMID_LEN); + (*printer)("Unique machine name is being truncated to %s " + "characters\n", UMID_LEN); strlcpy(umid, name, sizeof(umid)); umid_is_random = is_random; @@ -54,7 +54,7 @@ static int __init set_umid(char *name, i static int __init set_umid_arg(char *name, int *add) { - return(set_umid(name, 0)); + return(set_umid(name, 0, printf)); } __uml_setup("umid=", set_umid_arg, @@ -67,7 +67,7 @@ int __init umid_file_name(char *name, ch { int n; - if(!umid_inited && make_umid()) return(-1); + if(!umid_inited && make_umid(printk)) return(-1); n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; if(n > len){ @@ -85,22 +85,23 @@ static int __init create_pid_file(void) { char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; char pid[sizeof("nnnnn\0")]; - int fd; + int fd, n; if(umid_file_name("pid", file, sizeof(file))) return 0; fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), 0644); if(fd < 0){ - printk("Open of machine pid file \"%s\" failed - " - "errno = %d\n", file, -fd); + printf("Open of machine pid file \"%s\" failed - " + "err = %d\n", file, -fd); return 0; } sprintf(pid, "%d\n", os_getpid()); - if(write(fd, pid, strlen(pid)) != strlen(pid)) - printk("Write of pid file failed - errno = %d\n", errno); - close(fd); + n = os_write_file(fd, pid, strlen(pid)); + if(n != strlen(pid)) + printf("Write of pid file failed - err = %d\n", -n); + os_close_file(fd); return 0; } @@ -111,7 +112,8 @@ static int actually_do_remove(char *dir) int len; char file[256]; - if((directory = opendir(dir)) == NULL){ + directory = opendir(dir); + if(directory == NULL){ printk("actually_do_remove : couldn't open directory '%s', " "errno = %d\n", dir, errno); return(1); @@ -160,22 +162,24 @@ int not_dead_yet(char *dir) { char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; char pid[sizeof("nnnnn\0")], *end; - int dead, fd, p; + int dead, fd, p, n; sprintf(file, "%s/pid", dir); dead = 0; - if((fd = os_open_file(file, of_read(OPENFLAGS()), 0)) < 0){ + fd = os_open_file(file, of_read(OPENFLAGS()), 0); + if(fd < 0){ if(fd != -ENOENT){ printk("not_dead_yet : couldn't open pid file '%s', " - "errno = %d\n", file, -fd); + "err = %d\n", file, -fd); return(1); } dead = 1; } if(fd > 0){ - if(read(fd, pid, sizeof(pid)) < 0){ + n = os_read_file(fd, pid, sizeof(pid)); + if(n < 0){ printk("not_dead_yet : couldn't read pid file '%s', " - "errno = %d\n", file, errno); + "err = %d\n", file, -n); return(1); } p = strtoul(pid, &end, 0); @@ -197,7 +201,7 @@ static int __init set_uml_dir(char *name if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ uml_dir = malloc(strlen(name) + 1); if(uml_dir == NULL){ - printk("Failed to malloc uml_dir - error = %d\n", + printf("Failed to malloc uml_dir - error = %d\n", errno); uml_dir = name; return(0); @@ -217,7 +221,7 @@ static int __init make_uml_dir(void) char *home = getenv("HOME"); if(home == NULL){ - printk("make_uml_dir : no value in environment for " + printf("make_uml_dir : no value in environment for " "$HOME\n"); exit(1); } @@ -232,57 +236,59 @@ static int __init make_uml_dir(void) dir[len + 1] = '\0'; } - if((uml_dir = malloc(strlen(dir) + 1)) == NULL){ + uml_dir = malloc(strlen(dir) + 1); + if(uml_dir == NULL){ printf("make_uml_dir : malloc failed, errno = %d\n", errno); exit(1); } strcpy(uml_dir, dir); if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ - printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno); + printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); return(-1); } return 0; } -static int __init make_umid(void) +static int __init make_umid(int (*printer)(const char *fmt, ...)) { int fd, err; char tmp[strlen(uml_dir) + UMID_LEN + 1]; strlcpy(tmp, uml_dir, sizeof(tmp)); - if(*umid == 0){ + if(!umid_inited){ strcat(tmp, "XXXXXX"); fd = mkstemp(tmp); if(fd < 0){ - printk("make_umid - mkstemp failed, errno = %d\n", - errno); + (*printer)("make_umid - mkstemp failed, errno = %d\n", + errno); return(1); } - close(fd); + os_close_file(fd); /* There's a nice tiny little race between this unlink and * the mkdir below. It'd be nice if there were a mkstemp * for directories. */ unlink(tmp); - set_umid(&tmp[strlen(uml_dir)], 1); + set_umid(&tmp[strlen(uml_dir)], 1, printer); } sprintf(tmp, "%s%s", uml_dir, umid); - if((err = mkdir(tmp, 0777)) < 0){ + err = mkdir(tmp, 0777); + if(err < 0){ if(errno == EEXIST){ if(not_dead_yet(tmp)){ - printk("umid '%s' is in use\n", umid); + (*printer)("umid '%s' is in use\n", umid); return(-1); } err = mkdir(tmp, 0777); } } if(err < 0){ - printk("Failed to create %s - errno = %d\n", umid, errno); + (*printer)("Failed to create %s - errno = %d\n", umid, errno); return(-1); } @@ -295,7 +301,13 @@ __uml_setup("uml_dir=", set_uml_dir, ); __uml_postsetup(make_uml_dir); -__uml_postsetup(make_umid); + +static int __init make_umid_setup(void) +{ + return(make_umid(printf)); +} + +__uml_postsetup(make_umid_setup); __uml_postsetup(create_pid_file); /* --- linux-2.6.2-rc1/arch/um/kernel/user_syms.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/kernel/user_syms.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include #include #include @@ -27,7 +27,7 @@ struct module_symbol #define __MODULE_STRING_1(x) #x #define __MODULE_STRING(x) __MODULE_STRING_1(x) -#if !defined(__AUTOCONF_INCLUDED__) +#if !defined(AUTOCONF_INCLUDED) #define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module #define EXPORT_SYMBOL(var) error config_must_be_included_before_module --- linux-2.6.2-rc1/arch/um/kernel/user_util.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/user_util.c 2004-01-21 23:30:29.000000000 -0800 @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -82,7 +81,8 @@ int wait_for_stop(int pid, int sig, int int status, ret; while(1){ - if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) || + ret = waitpid(pid, &status, WUNTRACED); + if((ret < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ if(ret < 0){ if(errno == EINTR) continue; @@ -119,17 +119,6 @@ int wait_for_stop(int pid, int sig, int } } -int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags) -{ - int pid; - - pid = clone(fn, sp, flags, arg); - if(pid < 0) return(-1); - wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); - ptrace(PTRACE_CONT, pid, 0, 0); - return(pid); -} - int raw(int fd, int complain) { struct termios tt; --- linux-2.6.2-rc1/arch/um/main.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/main.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -123,12 +124,14 @@ int main(int argc, char **argv, char **e set_stklim(); - if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){ + new_argv = malloc((argc + 1) * sizeof(char *)); + if(new_argv == NULL){ perror("Mallocing argv"); exit(1); } for(i=0;i $@ +filechk_$(ARCH_DIR)/include/task.h := $(ARCH_DIR)/util/mk_task + $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task - $< > $@ + $(call filechk,$@) + +filechk_$(ARCH_DIR)/include/kern_constants.h := $(ARCH_DIR)/util/mk_constants $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants - $< > $@ + $(call filechk,$@) -$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \ - $(ARCH_DIR)/util FORCE ; +$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \ + sys_prepare FORCE ; $(ARCH_DIR)/util: FORCE - @$(call descend,$@,) + $(MAKE) -f scripts/Makefile.build obj=$@ -export SUBARCH USER_CFLAGS OS +export SUBARCH USER_CFLAGS OS --- linux-2.6.2-rc1/arch/um/Makefile-i386 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/Makefile-i386 2004-01-21 23:30:29.000000000 -0800 @@ -16,22 +16,28 @@ SYS_UTIL_DIR := $(ARCH_DIR)/sys-i386/uti SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h +sys_prepare: $(SYS_DIR)/sc.h + prepare: $(SYS_HEADERS) +filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc + $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc - $< > $@ + $(call filechk,$@) + +filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread - $< > $@ + $(call filechk,$@) -$(SYS_UTIL_DIR)/mk_sc: FORCE ; - @$(call descend,$(SYS_UTIL_DIR),$@) +$(SYS_UTIL_DIR)/mk_sc: scripts/fixdep include/config/MARKER FORCE ; + +@$(call descend,$(SYS_UTIL_DIR),$@) -$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ; - @$(call descend,$(SYS_UTIL_DIR),$@) +$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ; + +@$(call descend,$(SYS_UTIL_DIR),$@) $(SYS_UTIL_DIR): include/asm FORCE - @$(call descend,$@,) + +@$(call descend,$@,) sysclean : rm -f $(SYS_HEADERS) --- linux-2.6.2-rc1/arch/um/Makefile-skas 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/Makefile-skas 2004-01-21 23:30:29.000000000 -0800 @@ -14,7 +14,7 @@ MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/ LINK_SKAS = -Wl,-rpath,/lib LD_SCRIPT_SKAS = dyn.lds.s -GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h +GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h -$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : - $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h +$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h : + $(call descend,$(ARCH_DIR)/kernel/skas,$@) --- linux-2.6.2-rc1/arch/um/os-Linux/drivers/ethertap_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/os-Linux/drivers/ethertap_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,7 +8,6 @@ #include "linux/init.h" #include "linux/netdevice.h" #include "linux/etherdevice.h" -#include "linux/init.h" #include "net_kern.h" #include "net_user.h" #include "etap.h" --- linux-2.6.2-rc1/arch/um/os-Linux/drivers/ethertap_user.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/arch/um/os-Linux/drivers/ethertap_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -42,13 +41,14 @@ static void etap_change(int op, unsigned { struct addr_change change; void *output; + int n; change.what = op; memcpy(change.addr, addr, sizeof(change.addr)); memcpy(change.netmask, netmask, sizeof(change.netmask)); - if(write(fd, &change, sizeof(change)) != sizeof(change)) - printk("etap_change - request failed, errno = %d\n", - errno); + n = os_write_file(fd, &change, sizeof(change)); + if(n != sizeof(change)) + printk("etap_change - request failed, err = %d\n", -n); output = um_kmalloc(page_size()); if(output == NULL) printk("etap_change : Failed to allocate output buffer\n"); @@ -82,15 +82,15 @@ static void etap_pre_exec(void *arg) struct etap_pre_exec_data *data = arg; dup2(data->control_remote, 1); - close(data->data_me); - close(data->control_me); + os_close_file(data->data_me); + os_close_file(data->control_me); } static int etap_tramp(char *dev, char *gate, int control_me, int control_remote, int data_me, int data_remote) { struct etap_pre_exec_data pe_data; - int pid, status, err; + int pid, status, err, n; char version_buf[sizeof("nnnnn\0")]; char data_fd_buf[sizeof("nnnnnn\0")]; char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; @@ -114,21 +114,21 @@ static int etap_tramp(char *dev, char *g pe_data.data_me = data_me; pid = run_helper(etap_pre_exec, &pe_data, args, NULL); - if(pid < 0) err = errno; - close(data_remote); - close(control_remote); - if(read(control_me, &c, sizeof(c)) != sizeof(c)){ - printk("etap_tramp : read of status failed, errno = %d\n", - errno); - return(EINVAL); + if(pid < 0) err = pid; + os_close_file(data_remote); + os_close_file(control_remote); + n = os_read_file(control_me, &c, sizeof(c)); + if(n != sizeof(c)){ + printk("etap_tramp : read of status failed, err = %d\n", -n); + return(-EINVAL); } if(c != 1){ printk("etap_tramp : uml_net failed\n"); - err = EINVAL; - if(waitpid(pid, &status, 0) < 0) err = errno; - else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){ + err = -EINVAL; + if(waitpid(pid, &status, 0) < 0) + err = -errno; + else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) printk("uml_net didn't exit with status 1\n"); - } } return(err); } @@ -143,14 +143,14 @@ static int etap_open(void *data) if(err) return(err); err = os_pipe(data_fds, 0, 0); - if(err){ - printk("data os_pipe failed - errno = %d\n", -err); + if(err < 0){ + printk("data os_pipe failed - err = %d\n", -err); return(err); } err = os_pipe(control_fds, 1, 0); - if(err){ - printk("control os_pipe failed - errno = %d\n", -err); + if(err < 0){ + printk("control os_pipe failed - err = %d\n", -err); return(err); } @@ -167,9 +167,9 @@ static int etap_open(void *data) kfree(output); } - if(err != 0){ - printk("etap_tramp failed - errno = %d\n", err); - return(-err); + if(err < 0){ + printk("etap_tramp failed - err = %d\n", -err); + return(err); } pri->data_fd = data_fds[0]; @@ -183,11 +183,11 @@ static void etap_close(int fd, void *dat struct ethertap_data *pri = data; iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); - close(fd); + os_close_file(fd); os_shutdown_socket(pri->data_fd, 1, 1); - close(pri->data_fd); + os_close_file(pri->data_fd); pri->data_fd = -1; - close(pri->control_fd); + os_close_file(pri->control_fd); pri->control_fd = -1; } --- linux-2.6.2-rc1/arch/um/os-Linux/drivers/tuntap_user.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/os-Linux/drivers/tuntap_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -61,7 +60,7 @@ static void tuntap_pre_exec(void *arg) struct tuntap_pre_exec_data *data = arg; dup2(data->stdout, 1); - close(data->close_me); + os_close_file(data->close_me); } static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, @@ -86,7 +85,7 @@ static int tuntap_open_tramp(char *gate, if(pid < 0) return(-pid); - close(remote); + os_close_file(remote); msg.msg_name = NULL; msg.msg_namelen = 0; @@ -107,19 +106,19 @@ static int tuntap_open_tramp(char *gate, if(n < 0){ printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", errno); - return(errno); + return(-errno); } waitpid(pid, NULL, 0); cmsg = CMSG_FIRSTHDR(&msg); if(cmsg == NULL){ printk("tuntap_open_tramp : didn't receive a message\n"); - return(EINVAL); + return(-EINVAL); } if((cmsg->cmsg_level != SOL_SOCKET) || (cmsg->cmsg_type != SCM_RIGHTS)){ printk("tuntap_open_tramp : didn't receive a descriptor\n"); - return(EINVAL); + return(-EINVAL); } *fd_out = ((int *) CMSG_DATA(cmsg))[0]; return(0); @@ -133,27 +132,29 @@ static int tuntap_open(void *data) int err, fds[2], len, used; err = tap_open_common(pri->dev, pri->gate_addr); - if(err) return(err); + if(err < 0) + return(err); if(pri->fixed_config){ - if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){ - printk("Failed to open /dev/net/tun, errno = %d\n", - errno); - return(-errno); + pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); + if(pri->fd < 0){ + printk("Failed to open /dev/net/tun, err = %d\n", + -pri->fd); + return(pri->fd); } memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP; + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ - printk("TUNSETIFF failed, errno = %d", errno); - close(pri->fd); + printk("TUNSETIFF failed, errno = %d\n", errno); + os_close_file(pri->fd); return(-errno); } } else { err = os_pipe(fds, 0, 0); - if(err){ - printk("tuntap_open : os_pipe failed - errno = %d\n", + if(err < 0){ + printk("tuntap_open : os_pipe failed - err = %d\n", -err); return(err); } @@ -166,19 +167,19 @@ static int tuntap_open(void *data) fds[1], buffer, len, &used); output = buffer; - if(err == 0){ - pri->dev_name = uml_strdup(buffer); - output += IFNAMSIZ; - printk(output); - free_output_buffer(buffer); - } - else { - printk(output); + if(err < 0) { + printk("%s", output); free_output_buffer(buffer); - printk("tuntap_open_tramp failed - errno = %d\n", err); - return(-err); + printk("tuntap_open_tramp failed - err = %d\n", -err); + return(err); } - close(fds[0]); + + pri->dev_name = uml_strdup(buffer); + output += IFNAMSIZ; + printk("%s", output); + free_output_buffer(buffer); + + os_close_file(fds[0]); iter_addresses(pri->dev, open_addr, pri->dev_name); } @@ -191,7 +192,7 @@ static void tuntap_close(int fd, void *d if(!pri->fixed_config) iter_addresses(pri->dev, close_addr, pri->dev_name); - close(fd); + os_close_file(fd); pri->fd = -1; } --- linux-2.6.2-rc1/arch/um/os-Linux/file.c 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/os-Linux/file.c 2004-01-21 23:30:29.000000000 -0800 @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -17,33 +19,235 @@ #include "user.h" #include "kern_util.h" -int os_file_type(char *file) +static void copy_stat(struct uml_stat *dst, struct stat64 *src) +{ + *dst = ((struct uml_stat) { + .ust_dev = src->st_dev, /* device */ + .ust_ino = src->st_ino, /* inode */ + .ust_mode = src->st_mode, /* protection */ + .ust_nlink = src->st_nlink, /* number of hard links */ + .ust_uid = src->st_uid, /* user ID of owner */ + .ust_gid = src->st_gid, /* group ID of owner */ + .ust_size = src->st_size, /* total size, in bytes */ + .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ + .ust_blocks = src->st_blocks, /* number of blocks allocated */ + .ust_atime = src->st_atime, /* time of last access */ + .ust_mtime = src->st_mtime, /* time of last modification */ + .ust_ctime = src->st_ctime, /* time of last change */ + }); +} + +int os_stat_fd(const int fd, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + do { + err = fstat64(fd, &sbuf); + } while((err < 0) && (errno == EINTR)) ; + + if(err < 0) + return(-errno); + + if(ubuf != NULL) + copy_stat(ubuf, &sbuf); + return(err); +} + +int os_stat_file(const char *file_name, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + do { + err = stat64(file_name, &sbuf); + } while((err < 0) && (errno == EINTR)) ; + + if(err < 0) + return(-errno); + + if(ubuf != NULL) + copy_stat(ubuf, &sbuf); + return(err); +} + +int os_access(const char* file, int mode) +{ + int amode, err; + + amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | + (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; + + err = access(file, amode); + if(err < 0) + return(-errno); + + return(0); +} + +void os_print_error(int error, const char* str) +{ + errno = error < 0 ? -error : error; + + perror(str); +} + +/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ +int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) +{ + int err; + + err = ioctl(fd, cmd, arg); + if(err < 0) + return(-errno); + + return(err); +} + +int os_window_size(int fd, int *rows, int *cols) +{ + struct winsize size; + + if(ioctl(fd, TIOCGWINSZ, &size) < 0) + return(-errno); + + *rows = size.ws_row; + *cols = size.ws_col; + + return(0); +} + +int os_new_tty_pgrp(int fd, int pid) { - struct stat64 buf; + if(ioctl(fd, TIOCSCTTY, 0) < 0){ + printk("TIOCSCTTY failed, errno = %d\n", errno); + return(-errno); + } + + if(tcsetpgrp(fd, pid) < 0){ + printk("tcsetpgrp failed, errno = %d\n", errno); + return(-errno); + } + + return(0); +} + +/* FIXME: ensure namebuf in os_get_if_name is big enough */ +int os_get_ifname(int fd, char* namebuf) +{ + if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) + return(-errno); + + return(0); +} + +int os_set_slip(int fd) +{ + int disc, sencap; + + disc = N_SLIP; + if(ioctl(fd, TIOCSETD, &disc) < 0){ + printk("Failed to set slip line discipline - " + "errno = %d\n", errno); + return(-errno); + } + + sencap = 0; + if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){ + printk("Failed to set slip encapsulation - " + "errno = %d\n", errno); + return(-errno); + } + + return(0); +} + +int os_set_owner(int fd, int pid) +{ + if(fcntl(fd, F_SETOWN, pid) < 0){ + int save_errno = errno; + + if(fcntl(fd, F_GETOWN, 0) != pid) + return(-save_errno); + } + + return(0); +} + +/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ +int os_sigio_async(int master, int slave) +{ + int flags; - if(stat64(file, &buf) == -1) + flags = fcntl(master, F_GETFL); + if(flags < 0) { + printk("fcntl F_GETFL failed, errno = %d\n", errno); return(-errno); + } + + if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)){ + printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", errno); + return(-errno); + } + + if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){ + printk("fcntl F_SETFL failed, errno = %d\n", errno); + return(-errno); + } - if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); - else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); - else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); - else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); - else if(S_ISFIFO(buf.st_mode)) return(OS_TYPE_FIFO); - else if(S_ISSOCK(buf.st_mode)) return(OS_TYPE_SOCK); + return(0); +} + +int os_mode_fd(int fd, int mode) +{ + int err; + + do { + err = fchmod(fd, mode); + } while((err < 0) && (errno==EINTR)) ; + + if(err < 0) + return(-errno); + + return(0); +} + +int os_file_type(char *file) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if(err < 0) + return(err); + + if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); + else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); + else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); + else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); + else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); + else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); else return(OS_TYPE_FILE); } int os_file_mode(char *file, struct openflags *mode_out) { + int err; + *mode_out = OPENFLAGS(); - if(!access(file, W_OK)) *mode_out = of_write(*mode_out); - else if(errno != EACCES) - return(-errno); + err = os_access(file, OS_ACC_W_OK); + if((err < 0) && (err != -EACCES)) + return(err); - if(!access(file, R_OK)) *mode_out = of_read(*mode_out); - else if(errno != EACCES) - return(-errno); + *mode_out = of_write(*mode_out); + + err = os_access(file, OS_ACC_R_OK); + if((err < 0) && (err != -EACCES)) + return(err); + + *mode_out = of_read(*mode_out); return(0); } @@ -63,16 +267,14 @@ int os_open_file(char *file, struct open if(flags.e) f |= O_EXCL; fd = open64(file, f, mode); - if(fd < 0) return(-errno); - - if(flags.cl){ - if(fcntl(fd, F_SETFD, 1)){ - close(fd); - return(-errno); - } + if(fd < 0) + return(-errno); + + if(flags.cl && fcntl(fd, F_SETFD, 1)){ + os_close_file(fd); + return(-errno); } - return(fd); return(fd); } @@ -90,7 +292,7 @@ int os_connect_socket(char *name) err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); if(err) - return(err); + return(-errno); return(fd); } @@ -109,88 +311,162 @@ int os_seek_file(int fd, __u64 offset) return(0); } -int os_read_file(int fd, void *buf, int len) +static int fault_buffer(void *start, int len, + int (*copy_proc)(void *addr, void *buf, int len)) { - int n; + int page = getpagesize(), i; + char c; - /* Force buf into memory if it's not already. */ + for(i = 0; i < len; i += page){ + if((*copy_proc)(start + i, &c, sizeof(c))) + return(-EFAULT); + } + if((len % page) != 0){ + if((*copy_proc)(start + len - 1, &c, sizeof(c))) + return(-EFAULT); + } + return(0); +} - /* XXX This fails if buf is kernel memory */ -#ifdef notdef - if(copy_to_user_proc(buf, &c, sizeof(c))) - return(-EFAULT); -#endif +static int file_io(int fd, void *buf, int len, + int (*io_proc)(int fd, void *buf, int len), + int (*copy_user_proc)(void *addr, void *buf, int len)) +{ + int n, err; + + do { + n = (*io_proc)(fd, buf, len); + if((n < 0) && (errno == EFAULT)){ + err = fault_buffer(buf, len, copy_user_proc); + if(err) + return(err); + n = (*io_proc)(fd, buf, len); + } + } while((n < 0) && (errno == EINTR)); - n = read(fd, buf, len); if(n < 0) return(-errno); return(n); } -int os_write_file(int fd, void *buf, int count) +int os_read_file(int fd, void *buf, int len) { - int n; - - /* Force buf into memory if it's not already. */ - - /* XXX This fails if buf is kernel memory */ -#ifdef notdef - if(copy_to_user_proc(buf, buf, buf[0])) - return(-EFAULT); -#endif + return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, + copy_from_user_proc)); +} - n = write(fd, buf, count); - if(n < 0) - return(-errno); - return(n); +int os_write_file(int fd, const void *buf, int len) +{ + return(file_io(fd, (void *) buf, len, + (int (*)(int, void *, int)) write, copy_to_user_proc)); } int os_file_size(char *file, long long *size_out) { - struct stat64 buf; + struct uml_stat buf; + int err; - if(stat64(file, &buf) == -1){ - printk("Couldn't stat \"%s\" : errno = %d\n", file, errno); - return(-errno); + err = os_stat_file(file, &buf); + if(err < 0){ + printk("Couldn't stat \"%s\" : err = %d\n", file, -err); + return(err); } - if(S_ISBLK(buf.st_mode)){ + + if(S_ISBLK(buf.ust_mode)){ int fd, blocks; - if((fd = open64(file, O_RDONLY)) < 0){ - printk("Couldn't open \"%s\", errno = %d\n", file, - errno); - return(-errno); + fd = os_open_file(file, of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Couldn't open \"%s\", errno = %d\n", file, -fd); + return(fd); } if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ printk("Couldn't get the block size of \"%s\", " "errno = %d\n", file, errno); - close(fd); - return(-errno); + err = -errno; + os_close_file(fd); + return(err); } *size_out = ((long long) blocks) * 512; - close(fd); + os_close_file(fd); return(0); } - *size_out = buf.st_size; + *size_out = buf.ust_size; + return(0); +} + +int os_file_modtime(char *file, unsigned long *modtime) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if(err < 0){ + printk("Couldn't stat \"%s\" : err = %d\n", file, -err); + return(err); + } + + *modtime = buf.ust_mtime; return(0); } +int os_get_exec_close(int fd, int* close_on_exec) +{ + int ret; + + do { + ret = fcntl(fd, F_GETFD); + } while((ret < 0) && (errno == EINTR)) ; + + if(ret < 0) + return(-errno); + + *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; + return(ret); +} + +int os_set_exec_close(int fd, int close_on_exec) +{ + int flag, err; + + if(close_on_exec) flag = FD_CLOEXEC; + else flag = 0; + + do { + err = fcntl(fd, F_SETFD, flag); + } while((err < 0) && (errno == EINTR)) ; + + if(err < 0) + return(-errno); + return(err); +} + int os_pipe(int *fds, int stream, int close_on_exec) { int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; err = socketpair(AF_UNIX, type, 0, fds); - if(err) + if(err < 0) return(-errno); if(!close_on_exec) return(0); - if((fcntl(fds[0], F_SETFD, 1) < 0) || (fcntl(fds[1], F_SETFD, 1) < 0)) - printk("os_pipe : Setting FD_CLOEXEC failed, errno = %d", - errno); + err = os_set_exec_close(fds[0], 1); + if(err < 0) + goto error; + + err = os_set_exec_close(fds[1], 1); + if(err < 0) + goto error; return(0); + + error: + printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); + os_close_file(fds[1]); + os_close_file(fds[0]); + return(err); } int os_set_fd_async(int fd, int owner) @@ -270,7 +546,7 @@ int os_shutdown_socket(int fd, int r, in return(-EINVAL); } err = shutdown(fd, what); - if(err) + if(err < 0) return(-errno); return(0); } @@ -315,7 +591,7 @@ int os_rcv_fd(int fd, int *helper_pid_ou return(new); } -int create_unix_socket(char *file, int len) +int os_create_unix_socket(char *file, int len, int close_on_exec) { struct sockaddr_un addr; int sock, err; @@ -327,6 +603,13 @@ int create_unix_socket(char *file, int l return(-errno); } + if(close_on_exec) { + err = os_set_exec_close(sock, 1); + if(err < 0) + printk("create_unix_socket : close_on_exec failed, " + "err = %d", -err); + } + addr.sun_family = AF_UNIX; /* XXX Be more careful about overflow */ @@ -334,14 +617,45 @@ int create_unix_socket(char *file, int l err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0){ - printk("create_listening_socket - bind failed, errno = %d\n", - errno); + printk("create_listening_socket at '%s' - bind failed, " + "errno = %d\n", file, errno); return(-errno); } return(sock); } +void os_flush_stdout(void) +{ + fflush(stdout); +} + +int os_lock_file(int fd, int excl) +{ + int type = excl ? F_WRLCK : F_RDLCK; + struct flock lock = ((struct flock) { .l_type = type, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 } ); + int err, save; + + err = fcntl(fd, F_SETLK, &lock); + if(!err) + goto out; + + save = -errno; + err = fcntl(fd, F_GETLK, &lock); + if(err){ + err = -errno; + goto out; + } + + printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); + err = save; + out: + return(err); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux-2.6.2-rc1/arch/um/os-Linux/process.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/um/os-Linux/process.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -7,32 +7,37 @@ #include #include #include +#include #include #include #include "os.h" #include "user.h" +#define ARBITRARY_ADDR -1 +#define FAILURE_PID -1 + unsigned long os_process_pc(int pid) { char proc_stat[sizeof("/proc/#####/stat\0")], buf[256]; unsigned long pc; - int fd; + int fd, err; sprintf(proc_stat, "/proc/%d/stat", pid); fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); if(fd < 0){ - printk("os_process_pc - couldn't open '%s', errno = %d\n", - proc_stat, errno); - return(-1); + printk("os_process_pc - couldn't open '%s', err = %d\n", + proc_stat, -fd); + return(ARBITRARY_ADDR); } - if(read(fd, buf, sizeof(buf)) < 0){ - printk("os_process_pc - couldn't read '%s', errno = %d\n", - proc_stat, errno); - close(fd); - return(-1); + err = os_read_file(fd, buf, sizeof(buf)); + if(err < 0){ + printk("os_process_pc - couldn't read '%s', err = %d\n", + proc_stat, -err); + os_close_file(fd); + return(ARBITRARY_ADDR); } - close(fd); - pc = -1; + os_close_file(fd); + pc = ARBITRARY_ADDR; if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d " "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " "%*d %*d %*d %*d %ld", &pc) != 1){ @@ -52,22 +57,23 @@ int os_process_parent(int pid) snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); fd = os_open_file(stat, of_read(OPENFLAGS()), 0); if(fd < 0){ - printk("Couldn't open '%s', errno = %d\n", stat, -fd); - return(-1); + printk("Couldn't open '%s', err = %d\n", stat, -fd); + return(FAILURE_PID); } - n = read(fd, data, sizeof(data)); - close(fd); + n = os_read_file(fd, data, sizeof(data)); + os_close_file(fd); if(n < 0){ - printk("Couldn't read '%s', errno = %d\n", stat); - return(-1); + printk("Couldn't read '%s', err = %d\n", stat, -n); + return(FAILURE_PID); } - parent = -1; + parent = FAILURE_PID; /* XXX This will break if there is a space in the command */ n = sscanf(data, "%*d %*s %*c %d", &parent); - if(n != 1) printk("Failed to scan '%s'\n", data); + if(n != 1) + printk("Failed to scan '%s'\n", data); return(parent); } @@ -87,7 +93,8 @@ void os_kill_process(int pid, int reap_c void os_usr1_process(int pid) { - kill(pid, SIGUSR1); + syscall(__NR_tkill, pid, SIGUSR1); + /* kill(pid, SIGUSR1); */ } int os_getpid(void) @@ -95,7 +102,7 @@ int os_getpid(void) return(getpid()); } -int os_map_memory(void *virt, int fd, unsigned long off, unsigned long len, +int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { void *loc; @@ -104,8 +111,8 @@ int os_map_memory(void *virt, int fd, un prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); - loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, - fd, off); + loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, + fd, off); if(loc == MAP_FAILED) return(-errno); return(0); @@ -126,7 +133,8 @@ int os_unmap_memory(void *addr, int len) int err; err = munmap(addr, len); - if(err < 0) return(-errno); + if(err < 0) + return(-errno); return(0); } --- linux-2.6.2-rc1/arch/um/os-Linux/tty.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/arch/um/os-Linux/tty.c 2004-01-21 23:30:29.000000000 -0800 @@ -28,10 +28,10 @@ int get_pty(void) struct grantpt_info info; int fd; - if((fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0)) < 0){ - printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n", - errno); - return(-1); + fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); + if(fd < 0){ + printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); + return(fd); } info.fd = fd; @@ -39,7 +39,7 @@ int get_pty(void) if(info.res < 0){ printk("get_pty : Couldn't grant pty - errno = %d\n", - info.err); + -info.err); return(-1); } if(unlockpt(fd) < 0){ --- linux-2.6.2-rc1/arch/um/sys-i386/bugs.c 2003-06-14 12:18:29.000000000 -0700 +++ 25/arch/um/sys-i386/bugs.c 2004-01-21 23:30:29.000000000 -0800 @@ -4,20 +4,21 @@ */ #include -#include #include #include #include +#include #include "kern_util.h" #include "user.h" #include "sysdep/ptrace.h" #include "task.h" +#include "os.h" #define MAXTOKEN 64 /* Set during early boot */ -int cpu_has_cmov = 1; -int cpu_has_xmm = 0; +int host_has_cmov = 1; +int host_has_xmm = 0; static char token(int fd, char *buf, int len, char stop) { @@ -27,13 +28,15 @@ static char token(int fd, char *buf, int ptr = buf; end = &buf[len]; do { - n = read(fd, ptr, sizeof(*ptr)); + n = os_read_file(fd, ptr, sizeof(*ptr)); c = *ptr++; - if(n == 0) return(0); - else if(n != sizeof(*ptr)){ - printk("Reading /proc/cpuinfo failed, " - "errno = %d\n", errno); - return(-errno); + if(n != sizeof(*ptr)){ + if(n == 0) return(0); + printk("Reading /proc/cpuinfo failed, err = %d\n", -n); + if(n < 0) + return(n); + else + return(-EIO); } } while((c != '\n') && (c != stop) && (ptr < end)); @@ -45,45 +48,79 @@ static char token(int fd, char *buf, int return(c); } -static int check_cpu_feature(char *feature, int *have_it) +static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) { - char buf[MAXTOKEN], c; - int fd, len = sizeof(buf)/sizeof(buf[0]), n; - - printk("Checking for host processor %s support...", feature); - fd = open("/proc/cpuinfo", O_RDONLY); - if(fd < 0){ - printk("Couldn't open /proc/cpuinfo, errno = %d\n", errno); - return(0); - } + int n; + char c; - *have_it = 0; - buf[len - 1] = '\0'; + scratch[len - 1] = '\0'; while(1){ - c = token(fd, buf, len - 1, ':'); - if(c <= 0) goto out; + c = token(fd, scratch, len - 1, ':'); + if(c <= 0) + return(0); else if(c != ':'){ printk("Failed to find ':' in /proc/cpuinfo\n"); - goto out; + return(0); } - if(!strncmp(buf, "flags", strlen("flags"))) break; + if(!strncmp(scratch, key, strlen(key))) + return(1); do { - n = read(fd, &c, sizeof(c)); + n = os_read_file(fd, &c, sizeof(c)); if(n != sizeof(c)){ printk("Failed to find newline in " - "/proc/cpuinfo, n = %d, errno = %d\n", - n, errno); - goto out; + "/proc/cpuinfo, err = %d\n", -n); + return(0); } } while(c != '\n'); } + return(0); +} + +int cpu_feature(char *what, char *buf, int len) +{ + int fd, ret = 0; + + fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); + return(0); + } + + if(!find_cpuinfo_line(fd, what, buf, len)){ + printk("Couldn't find '%s' line in /proc/cpuinfo\n", what); + goto out_close; + } + + token(fd, buf, len, '\n'); + ret = 1; + + out_close: + os_close_file(fd); + return(ret); +} + +static int check_cpu_flag(char *feature, int *have_it) +{ + char buf[MAXTOKEN], c; + int fd, len = sizeof(buf)/sizeof(buf[0]); + + printk("Checking for host processor %s support...", feature); + fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); + if(fd < 0){ + printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); + return(0); + } + + *have_it = 0; + if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) + goto out; c = token(fd, buf, len - 1, ' '); if(c < 0) goto out; else if(c != ' '){ - printk("Failed to find ':' in /proc/cpuinfo\n"); + printk("Failed to find ' ' in /proc/cpuinfo\n"); goto out; } @@ -100,21 +137,48 @@ static int check_cpu_feature(char *featu out: if(*have_it == 0) printk("No\n"); else if(*have_it == 1) printk("Yes\n"); - close(fd); + os_close_file(fd); return(1); } +#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems + * for some people. + */ +static void disable_lcall(void) +{ + struct modify_ldt_ldt_s ldt; + int err; + + bzero(&ldt, sizeof(ldt)); + ldt.entry_number = 7; + ldt.base_addr = 0; + ldt.limit = 0; + err = modify_ldt(1, &ldt, sizeof(ldt)); + if(err) + printk("Failed to disable lcall7 - errno = %d\n", errno); +} +#endif + +void arch_init_thread(void) +{ +#if 0 + disable_lcall(); +#endif +} + void arch_check_bugs(void) { int have_it; - if(access("/proc/cpuinfo", R_OK)){ + if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){ printk("/proc/cpuinfo not available - skipping CPU capability " "checks\n"); return; } - if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it; - if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it; + if(check_cpu_flag("cmov", &have_it)) + host_has_cmov = have_it; + if(check_cpu_flag("xmm", &have_it)) + host_has_xmm = have_it; } int arch_handle_signal(int sig, union uml_pt_regs *regs) @@ -130,18 +194,18 @@ int arch_handle_signal(int sig, union um if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40)) return(0); - if(cpu_has_cmov == 0) + if(host_has_cmov == 0) panic("SIGILL caused by cmov, which this processor doesn't " "implement, boot a filesystem compiled for older " "processors"); - else if(cpu_has_cmov == 1) + else if(host_has_cmov == 1) panic("SIGILL caused by cmov, which this processor claims to " "implement"); - else if(cpu_has_cmov == -1) + else if(host_has_cmov == -1) panic("SIGILL caused by cmov, couldn't tell if this processor " "implements it, boot a filesystem compiled for older " "processors"); - else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov); + else panic("Bad value for host_has_cmov (%d)", host_has_cmov); return(0); } --- linux-2.6.2-rc1/arch/um/sys-i386/Makefile 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/sys-i386/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -1,7 +1,8 @@ -obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \ - ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o +obj-y = bugs.o checksum.o fault.o ksyms.o ldt.o ptrace.o \ + ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o time.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_MODULES) += module.o USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) @@ -9,6 +10,8 @@ USER_OBJS := $(foreach file,$(USER_OBJS) SYMLINKS = semaphore.c highmem.c module.c SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f) +clean-files := $(SYMLINKS) + semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel @@ -24,8 +27,7 @@ $(USER_OBJS) : %.o: %.c $(SYMLINKS): $(call make_link,$@) -clean: - $(MAKE) -C util clean +subdir- := util fastdep: --- linux-2.6.2-rc1/arch/um/sys-i386/ptrace_user.c 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/um/sys-i386/ptrace_user.c 2004-01-21 23:30:29.000000000 -0800 @@ -39,10 +39,10 @@ static void write_debugregs(int pid, uns nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); for(i = 0; i < nregs; i++){ if((i == 4) || (i == 5)) continue; - if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], + if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i], regs[i]) < 0) - printk("write_debugregs - ptrace failed, " - "errno = %d\n", errno); + printk("write_debugregs - ptrace failed on " + "register %d, errno = %d\n", errno); } } @@ -54,7 +54,7 @@ static void read_debugregs(int pid, unsi dummy = NULL; nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); for(i = 0; i < nregs; i++){ - regs[i] = ptrace(PTRACE_PEEKUSR, pid, + regs[i] = ptrace(PTRACE_PEEKUSER, pid, &dummy->u_debugreg[i], 0); } } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/um/sys-i386/time.c 2004-01-21 23:30:29.000000000 -0800 @@ -0,0 +1,24 @@ +/* + * sys-i386/time.c + * Created 25.9.2002 Sapan Bhatia + * + */ + +unsigned long long time_stamp(void) +{ + unsigned long low, high; + + asm("rdtsc" : "=a" (low), "=d" (high)); + return((((unsigned long long) high) << 32) + low); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ --- linux-2.6.2-rc1/arch/um/sys-i386/util/Makefile 2003-06-14 12:18:35.000000000 -0700 +++ 25/arch/um/sys-i386/util/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -6,10 +6,10 @@ targets := mk_thread_kern.o mk_thread_u mk_sc-objs := mk_sc.o $(obj)/mk_thread : $(obj)/mk_thread_kern.o $(obj)/mk_thread_user.o - $(CC) $(CFLAGS) -o $@ $^ + $(HOSTCC) $(CFLAGS) -o $@ $^ $(obj)/mk_thread_user.o : $(src)/mk_thread_user.c - $(CC) $(USER_CFLAGS) -c -o $@ $< + $(HOSTCC) $(USER_CFLAGS) -c -o $@ $< clean : $(RM) -f $(build-targets) --- linux-2.6.2-rc1/arch/um/sys-i386/util/mk_sc.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/arch/um/sys-i386/util/mk_sc.c 2004-01-21 23:30:29.000000000 -0800 @@ -38,6 +38,7 @@ int main(int argc, char **argv) SC_OFFSET("SC_ERR", err); SC_OFFSET("SC_CR2", cr2); SC_OFFSET("SC_FPSTATE", fpstate); + SC_OFFSET("SC_SIGMASK", oldmask); SC_FP_OFFSET("SC_FP_CW", cw); SC_FP_OFFSET("SC_FP_SW", sw); SC_FP_OFFSET("SC_FP_TAG", tag); --- linux-2.6.2-rc1/arch/um/uml.lds.S 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/um/uml.lds.S 2004-01-21 23:30:29.000000000 -0800 @@ -9,7 +9,6 @@ SECTIONS { . = START + SIZEOF_HEADERS; - . = ALIGN(4096); __binary_start = .; #ifdef MODE_TT .thread_private : { @@ -26,7 +25,11 @@ SECTIONS . = ALIGN(4096); /* Init code and data */ _stext = .; __init_begin = .; - .text.init : { *(.text.init) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } . = ALIGN(4096); .text : { @@ -38,7 +41,7 @@ SECTIONS #include "asm/common.lds.S" - .data.init : { *(.data.init) } + init.data : { *(init.data) } .data : { . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ --- linux-2.6.2-rc1/arch/um/util/Makefile 2003-06-14 12:18:51.000000000 -0700 +++ 25/arch/um/util/Makefile 2004-01-21 23:30:29.000000000 -0800 @@ -3,19 +3,19 @@ targets := mk_task_user.o mk_task_kern. mk_constants_user.o mk_constants_kern.o $(obj)/mk_task: $(obj)/mk_task_user.o $(obj)/mk_task_kern.o - $(CC) -o $@ $^ + $(HOSTCC) -o $@ $^ $(obj)/mk_task_user.o: $(src)/mk_task_user.c - $(CC) -o $@ -c $< + $(HOSTCC) -o $@ -c $< $(obj)/mk_constants : $(obj)/mk_constants_user.o $(obj)/mk_constants_kern.o - $(CC) -o $@ $^ + $(HOSTCC) -o $@ $^ $(obj)/mk_constants_user.o : $(src)/mk_constants_user.c - $(CC) -c $< -o $@ + $(HOSTCC) -c $< -o $@ $(obj)/mk_constants_kern.o : $(src)/mk_constants_kern.c - $(CC) $(CFLAGS) -c $< -o $@ + $(HOSTCC) $(CFLAGS) -c $< -o $@ clean: $(RM) $(build-targets) --- linux-2.6.2-rc1/arch/um/util/mk_constants_kern.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/um/util/mk_constants_kern.c 2004-01-21 23:30:29.000000000 -0800 @@ -1,5 +1,6 @@ #include "linux/kernel.h" #include "linux/stringify.h" +#include "linux/time.h" #include "asm/page.h" extern void print_head(void); @@ -11,6 +12,7 @@ int main(int argc, char **argv) { print_head(); print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); + print_constant_str("UM_KERN_EMERG", KERN_EMERG); print_constant_str("UM_KERN_ALERT", KERN_ALERT); print_constant_str("UM_KERN_CRIT", KERN_CRIT); @@ -19,6 +21,8 @@ int main(int argc, char **argv) print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); print_constant_str("UM_KERN_INFO", KERN_INFO); print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); + + print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC); print_tail(); return(0); } --- linux-2.6.2-rc1/arch/x86_64/boot/compressed/head.S 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/x86_64/boot/compressed/head.S 2004-01-21 23:26:58.000000000 -0800 @@ -26,6 +26,7 @@ .code32 .text +#define IN_BOOTLOADER #include #include --- linux-2.6.2-rc1/arch/x86_64/boot/compressed/misc.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/arch/x86_64/boot/compressed/misc.c 2004-01-21 23:26:58.000000000 -0800 @@ -9,6 +9,7 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ +#define IN_BOOTLOADER #include "miscsetup.h" #include --- linux-2.6.2-rc1/arch/x86_64/defconfig 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/defconfig 2004-01-21 23:30:01.000000000 -0800 @@ -9,6 +9,7 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_X86_CMPXCHG=y CONFIG_EARLY_PRINTK=y CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y CONFIG_GENERIC_ISA_DMA=y # @@ -26,7 +27,8 @@ CONFIG_SYSVIPC=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_IKCONFIG is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_FUTEX=y @@ -34,6 +36,7 @@ CONFIG_EPOLL=y CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set # # Loadable module support @@ -59,7 +62,6 @@ CONFIG_X86_CPUID=y CONFIG_X86_IO_APIC=y CONFIG_X86_LOCAL_APIC=y CONFIG_MTRR=y -# CONFIG_HUGETLB_PAGE is not set CONFIG_SMP=y # CONFIG_PREEMPT is not set CONFIG_K8_NUMA=y @@ -79,9 +81,9 @@ CONFIG_SOFTWARE_SUSPEND=y # # ACPI (Advanced Configuration and Power Interface) Support # -# CONFIG_ACPI_HT is not set CONFIG_ACPI=y CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_SLEEP=y CONFIG_ACPI_SLEEP_PROC_FS=y CONFIG_ACPI_AC=y @@ -94,17 +96,23 @@ CONFIG_ACPI_THERMAL=y CONFIG_ACPI_TOSHIBA=y CONFIG_ACPI_DEBUG=y CONFIG_ACPI_BUS=y -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y +# CONFIG_ACPI_RELAXED_AML is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set # # Bus options (PCI etc.) # CONFIG_PCI=y CONFIG_PCI_DIRECT=y +# CONFIG_PCI_USE_VECTOR is not set # CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_NAMES is not set # CONFIG_HOTPLUG is not set @@ -115,6 +123,7 @@ CONFIG_PCI_DIRECT=y CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_IA32_EMULATION=y +# CONFIG_IA32_AOUT is not set CONFIG_COMPAT=y CONFIG_UID16=y @@ -179,7 +188,6 @@ CONFIG_BLK_DEV_IDEPCI=y # CONFIG_BLK_DEV_OPTI621 is not set # CONFIG_BLK_DEV_RZ1000 is not set CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDE_TCQ is not set # CONFIG_BLK_DEV_IDEDMA_FORCED is not set CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_IDEDMA_ONLYDISK is not set @@ -246,6 +254,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_AIC79XX is not set # CONFIG_SCSI_ADVANSYS is not set # CONFIG_SCSI_MEGARAID is not set +# CONFIG_SCSI_SATA is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_CPQFCTS is not set # CONFIG_SCSI_DMX3191D is not set @@ -260,7 +269,7 @@ CONFIG_SCSI_IPS=m # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set # @@ -313,7 +322,12 @@ CONFIG_IP_MULTICAST=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set -# CONFIG_IPV6 is not set +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_TUNNEL is not set # CONFIG_DECNET is not set # CONFIG_BRIDGE is not set # CONFIG_NETFILTER is not set @@ -325,7 +339,9 @@ CONFIG_IPV6_SCTP__=y # CONFIG_IP_SCTP is not set # CONFIG_ATM is not set # CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_NET_DIVERT is not set @@ -358,7 +374,7 @@ CONFIG_NETDEVICES=y # Ethernet (10 or 100Mbit) # CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set +CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set @@ -388,7 +404,6 @@ CONFIG_8139TOO=m # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # @@ -421,10 +436,10 @@ CONFIG_TIGON3=y # CONFIG_NET_RADIO is not set # -# Token Ring devices (depends on LLC=y) +# Token Ring devices # +# CONFIG_TR is not set # CONFIG_NET_FC is not set -# CONFIG_RCPCI is not set # CONFIG_SHAPER is not set # @@ -443,6 +458,11 @@ CONFIG_TIGON3=y # CONFIG_IRDA is not set # +# Bluetooth support +# +# CONFIG_BT is not set + +# # ISDN subsystem # # CONFIG_ISDN_BOOL is not set @@ -504,6 +524,7 @@ CONFIG_HW_CONSOLE=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_ACPI is not set +CONFIG_SERIAL_8250_NR_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set # @@ -520,7 +541,11 @@ CONFIG_UNIX98_PTY_COUNT=256 # CONFIG_I2C is not set # -# I2C Hardware Sensors Mainboard support +# I2C Algorithms +# + +# +# I2C Hardware Bus support # # @@ -549,7 +574,6 @@ CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set # # Ftape, the floppy tape device driver @@ -559,6 +583,7 @@ CONFIG_AGP_AMD64=y # CONFIG_DRM is not set # CONFIG_MWAVE is not set CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 CONFIG_HANGCHECK_TIMER=y # @@ -619,10 +644,13 @@ CONFIG_ISO9660_FS=y # Pseudo filesystems # CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y # CONFIG_DEVFS_FS is not set CONFIG_DEVPTS_FS=y # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y # @@ -647,6 +675,7 @@ CONFIG_RAMFS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y # CONFIG_NFSD_V4 is not set @@ -670,6 +699,11 @@ CONFIG_SUNRPC=y CONFIG_MSDOS_PARTITION=y # +# Native Language Support +# +# CONFIG_NLS is not set + +# # Graphics support # # CONFIG_FB is not set @@ -707,13 +741,15 @@ CONFIG_SOUND_PRIME=y # CONFIG_SOUND_MAESTRO is not set # CONFIG_SOUND_MAESTRO3 is not set CONFIG_SOUND_ICH=y -# CONFIG_SOUND_RME96XX is not set # CONFIG_SOUND_SONICVIBES is not set # CONFIG_SOUND_TRIDENT is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set # CONFIG_SOUND_VIA82CXXX is not set # CONFIG_SOUND_OSS is not set # CONFIG_SOUND_ALI5455 is not set # CONFIG_SOUND_FORTE is not set +# CONFIG_SOUND_RME96XX is not set # CONFIG_SOUND_AD1980 is not set # @@ -723,11 +759,6 @@ CONFIG_SOUND_ICH=y # CONFIG_USB_GADGET is not set # -# Bluetooth support -# -# CONFIG_BT is not set - -# # Profiling support # CONFIG_PROFILING=y @@ -744,8 +775,6 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_INFO is not set # CONFIG_FRAME_POINTER is not set # CONFIG_IOMMU_DEBUG is not set -CONFIG_IOMMU_LEAK=y -CONFIG_MCE_DEBUG=y # # Security options @@ -760,4 +789,4 @@ CONFIG_MCE_DEBUG=y # # Library routines # -# CONFIG_CRC32 is not set +CONFIG_CRC32=y --- linux-2.6.2-rc1/arch/x86_64/ia32/ia32entry.S 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/ia32/ia32entry.S 2004-01-21 23:26:59.000000000 -0800 @@ -4,6 +4,7 @@ * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ +#include #include #include #include @@ -45,6 +46,7 @@ * with the int 0x80 path. */ ENTRY(ia32_cstar_target) + CFI_STARTPROC swapgs movl %esp,%r8d movq %gs:pda_kernelstack,%rsp @@ -105,6 +107,7 @@ cstar_tracesys: cstar_badarg: movq $-EFAULT,%rax jmp cstar_sysret + CFI_ENDPROC /* * Emulated IA32 system calls via int 0x80. @@ -128,6 +131,7 @@ cstar_badarg: */ ENTRY(ia32_syscall) + CFI_STARTPROC swapgs sti movl %eax,%eax @@ -168,6 +172,7 @@ ni_syscall: quiet_ni_syscall: movq $-ENOSYS,%rax ret + CFI_ENDPROC .macro PTREGSCALL label, func .globl \label @@ -188,6 +193,7 @@ quiet_ni_syscall: PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend ENTRY(ia32_ptregs_common) + CFI_STARTPROC popq %r11 SAVE_REST movq %r11, %r15 @@ -198,6 +204,7 @@ ENTRY(ia32_ptregs_common) je int_ret_from_sys_call /* misbalances the call/ret stack. sorry */ pushq %r11 ret + CFI_ENDPROC .data .align 8 --- linux-2.6.2-rc1/arch/x86_64/ia32/ia32_signal.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/ia32/ia32_signal.c 2004-01-21 23:30:01.000000000 -0800 @@ -44,10 +44,10 @@ asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); void signal_fault(struct pt_regs *regs, void *frame, char *where); -static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from) +int ia32_copy_siginfo_to_user(siginfo_t32 __user *to, siginfo_t *from) { int err; - if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t32))) return -EFAULT; /* If you change siginfo_t structure, please make sure that @@ -55,21 +55,17 @@ static int ia32_copy_siginfo_to_user(sig It should never copy any pad contained in the structure to avoid security leaks, but must copy the generic 3 ints plus the relevant union member. */ - - if (from->si_code < 0) { err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); err |= __put_user(from->si_code, &to->si_code); - err |= __put_user(from->_sifields._rt._pid, &to->_sifields._rt._pid); - err |= __put_user(from->_sifields._rt._uid, &to->_sifields._rt._uid); - err |= __put_user((u32)(u64)from->_sifields._rt._sigval.sival_ptr, - &to->_sifields._rt._sigval.sival_ptr); - } else { - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - /* First 32bits of unions are always present. */ + + if (from->si_code < 0) { err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user((u32)(u64)from->si_ptr, &to->si_ptr); + } else { + /* First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) */ switch (from->si_code >> 16) { case __SI_FAULT >> 16: break; @@ -78,18 +74,39 @@ static int ia32_copy_siginfo_to_user(sig err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); default: + case __SI_KILL >> 16: err |= __put_user(from->si_uid, &to->si_uid); break; case __SI_POLL >> 16: - err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user((u32)(u64)from->si_ptr, &to->si_ptr); + break; /* case __SI_RT: This is not generated by the kernel as of now. */ } } return err; } +int ia32_copy_siginfo_from_user(siginfo_t *to, siginfo_t32 __user *from) +{ + int err; + if (!access_ok (VERIFY_READ, from, sizeof(siginfo_t32))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user((u32)(u64)to->si_ptr, &from->si_ptr); + + return err; +} + asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs regs) { --- linux-2.6.2-rc1/arch/x86_64/ia32/sys_ia32.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/ia32/sys_ia32.c 2004-01-21 23:30:33.000000000 -0800 @@ -274,13 +274,16 @@ sys32_rt_sigaction(int sig, struct sigac return -EINVAL; if (act) { + compat_uptr_t handler, restorer; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user((long)new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer)|| + __get_user(restorer, &act->sa_restorer)|| __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t))) return -EFAULT; - + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ switch (_NSIG_WORDS) { case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] @@ -331,13 +334,18 @@ sys32_sigaction (int sig, struct old_sig if (act) { compat_old_sigset_t mask; + compat_uptr_t handler, restorer; if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user((long)new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(restorer, &act->sa_restorer) || __get_user(mask, &act->sa_mask)) return -EFAULT; + + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + siginitset(&new_ka.sa.sa_mask, mask); } @@ -525,7 +533,7 @@ filldir32 (void *__buf, const char *name put_user(reclen, &dirent->d_reclen); copy_to_user(dirent->d_name, name, namlen); put_user(0, dirent->d_name + namlen); - ((char *) dirent) += reclen; + dirent = ((void *)dirent) + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; @@ -1022,84 +1030,6 @@ sys32_rt_sigpending(compat_sigset_t *set return ret; } -siginfo_t32 * -siginfo64to32(siginfo_t32 *d, siginfo_t *s) -{ - memset (d, 0, sizeof(siginfo_t32)); - d->si_signo = s->si_signo; - d->si_errno = s->si_errno; - d->si_code = s->si_code; - if (s->si_signo >= SIGRTMIN) { - d->si_pid = s->si_pid; - d->si_uid = s->si_uid; - memcpy(&d->si_int, &s->si_int, - sizeof(siginfo_t) - offsetof(siginfo_t,si_int)); - } else switch (s->si_signo) { - /* XXX: What about POSIX1.b timers */ - case SIGCHLD: - d->si_pid = s->si_pid; - d->si_status = s->si_status; - d->si_utime = s->si_utime; - d->si_stime = s->si_stime; - break; - case SIGSEGV: - case SIGBUS: - case SIGFPE: - case SIGILL: - d->si_addr = (long)(s->si_addr); -// d->si_trapno = s->si_trapno; - break; - case SIGPOLL: - d->si_band = s->si_band; - d->si_fd = s->si_fd; - break; - default: - d->si_pid = s->si_pid; - d->si_uid = s->si_uid; - break; - } - return d; -} - -siginfo_t * -siginfo32to64(siginfo_t *d, siginfo_t32 *s) -{ - d->si_signo = s->si_signo; - d->si_errno = s->si_errno; - d->si_code = s->si_code; - if (s->si_signo >= SIGRTMIN) { - d->si_pid = s->si_pid; - d->si_uid = s->si_uid; - memcpy(&d->si_int, - &s->si_int, - sizeof(siginfo_t) - offsetof(siginfo_t, si_int)); - } else switch (s->si_signo) { - /* XXX: What about POSIX1.b timers */ - case SIGCHLD: - d->si_pid = s->si_pid; - d->si_status = s->si_status; - d->si_utime = s->si_utime; - d->si_stime = s->si_stime; - break; - case SIGSEGV: - case SIGBUS: - case SIGFPE: - case SIGILL: - d->si_addr = (void *)A(s->si_addr); -// d->si_trapno = s->si_trapno; - break; - case SIGPOLL: - d->si_band = s->si_band; - d->si_fd = s->si_fd; - break; - default: - d->si_pid = s->si_pid; - d->si_uid = s->si_uid; - break; - } - return d; -} - extern asmlinkage long sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, const struct timespec *uts, size_t sigsetsize); @@ -1114,7 +1044,6 @@ sys32_rt_sigtimedwait(compat_sigset_t *u int ret; mm_segment_t old_fs = get_fs(); siginfo_t info; - siginfo_t32 info32; if (copy_from_user (&s32, uthese, sizeof(compat_sigset_t))) return -EFAULT; @@ -1126,13 +1055,18 @@ sys32_rt_sigtimedwait(compat_sigset_t *u } if (uts && get_compat_timespec(&t, uts)) return -EFAULT; + if (uinfo) { + /* stop data leak to user space in case of structure fill mismatch + * between sys_rt_sigtimedwait & ia32_copy_siginfo_to_user. + */ + memset(&info, 0, sizeof(info)); + } set_fs (KERNEL_DS); ret = sys_rt_sigtimedwait(&s, uinfo ? &info : NULL, uts ? &t : NULL, sigsetsize); set_fs (old_fs); if (ret >= 0 && uinfo) { - if (copy_to_user (uinfo, siginfo64to32(&info32, &info), - sizeof(siginfo_t32))) + if (ia32_copy_siginfo_to_user(uinfo, &info)) return -EFAULT; } return ret; @@ -1145,14 +1079,11 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) { siginfo_t info; - siginfo_t32 info32; int ret; mm_segment_t old_fs = get_fs(); - if (copy_from_user (&info32, uinfo, sizeof(siginfo_t32))) + if (ia32_copy_siginfo_from_user(&info, uinfo)) return -EFAULT; - /* XXX: Is this correct? */ - siginfo32to64(&info, &info32); set_fs (KERNEL_DS); ret = sys_rt_sigqueueinfo(pid, sig, &info); set_fs (old_fs); --- linux-2.6.2-rc1/arch/x86_64/Kconfig 2004-01-20 21:51:18.000000000 -0800 +++ 25/arch/x86_64/Kconfig 2004-01-21 23:30:43.000000000 -0800 @@ -68,6 +68,9 @@ config HPET_TIMER If unsure, say Y. +config HPET_EMULATE_RTC + def_bool HPET_TIMER && RTC=y + config GENERIC_ISA_DMA bool default y @@ -108,10 +111,6 @@ config X86_TSC bool default y -config X86_GOOD_APIC - bool - default y - config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" help @@ -430,6 +429,8 @@ source "drivers/input/Kconfig" source "drivers/char/Kconfig" +source "drivers/i2c/Kconfig" + source "drivers/misc/Kconfig" source "drivers/media/Kconfig" @@ -532,9 +533,186 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -#config X86_REMOTE_DEBUG -# bool "kgdb debugging stub" +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + +config MAGIC_SYSRQ + bool + depends on KGDB_SYSRQ + default y endmenu source "security/Kconfig" --- linux-2.6.2-rc1/arch/x86_64/kernel/acpi/boot.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/acpi/boot.c 2004-01-21 23:27:03.000000000 -0800 @@ -46,11 +46,13 @@ #include #include -int acpi_lapic = 0; -int acpi_ioapic = 0; - #define PREFIX "ACPI: " +int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ +int acpi_ht __initdata = 1; /* enable HT */ + +int acpi_lapic = 0; +int acpi_ioapic = 0; /* -------------------------------------------------------------------------- Boot-time Configuration @@ -253,29 +255,66 @@ acpi_parse_hpet ( #ifdef CONFIG_ACPI_BUS /* - * Set specified PIC IRQ to level triggered mode. + * "acpi_pic_sci=level" (current default) + * programs the PIC-mode SCI to Level Trigger. + * (NO-OP if the BIOS set Level Trigger already) + * + * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's + * it may require Edge Trigger -- use "acpi_pic_sci=edge" + * (NO-OP if the BIOS set Edge Trigger already) * * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) - * - * As the BIOS should have done this for us, - * print a warning if the IRQ wasn't already set to level. */ -void acpi_pic_set_level_irq(unsigned int irq) +static int __initdata acpi_pic_sci_trigger; /* 0: level, 1: edge */ + +void __init +acpi_pic_sci_set_trigger(unsigned int irq) { unsigned char mask = 1 << (irq & 7); unsigned int port = 0x4d0 + (irq >> 3); unsigned char val = inb(port); + + printk(PREFIX "IRQ%d SCI:", irq); if (!(val & mask)) { - printk(KERN_WARNING PREFIX "IRQ %d was Edge Triggered, " - "setting to Level Triggerd\n", irq); - outb(val | mask, port); + printk(" Edge"); + + if (!acpi_pic_sci_trigger) { + printk(" set to Level"); + outb(val | mask, port); + } + } else { + printk(" Level"); + + if (acpi_pic_sci_trigger) { + printk(" set to Edge"); + outb(val | mask, port); + } } + printk(" Trigger.\n"); } + +int __init +acpi_pic_sci_setup(char *str) +{ + while (str && *str) { + if (strncmp(str, "level", 5) == 0) + acpi_pic_sci_trigger = 0; /* force level trigger */ + if (strncmp(str, "edge", 4) == 0) + acpi_pic_sci_trigger = 1; /* force edge trigger */ + str = strchr(str, ','); + if (str) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_pic_sci=", acpi_pic_sci_setup); + #endif /* CONFIG_ACPI_BUS */ static unsigned long __init @@ -354,8 +393,10 @@ acpi_boot_init (void) * Initialize the ACPI boot-time table parser. */ result = acpi_table_init(); - if (result) + if (result) { + acpi_disabled = 1; return result; + } result = acpi_blacklisted(); if (result) { @@ -442,7 +483,7 @@ acpi_boot_init (void) * If MPS is present, it will handle them, * otherwise the system will stay in PIC mode */ - if (acpi_disabled) { + if (acpi_disabled || acpi_noirq) { return 1; } @@ -484,6 +525,8 @@ acpi_boot_init (void) acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + acpi_irq_balance_set(NULL); + acpi_ioapic = 1; #endif /*CONFIG_X86_IO_APIC*/ --- linux-2.6.2-rc1/arch/x86_64/kernel/acpi/sleep.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/arch/x86_64/kernel/acpi/sleep.c 2004-01-21 23:30:01.000000000 -0800 @@ -56,6 +56,7 @@ /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; +unsigned long acpi_video_flags; extern char wakeup_start, wakeup_end; extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); @@ -116,6 +117,22 @@ void __init acpi_reserve_bootmem(void) printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address); } +static int __init acpi_sleep_setup(char *str) +{ + while ((str != NULL) && (*str != '\0')) { + if (strncmp(str, "s3_bios", 7) == 0) + acpi_video_flags = 1; + if (strncmp(str, "s3_mode", 7) == 0) + acpi_video_flags |= 2; + str = strchr(str, ','); + if (str != NULL) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_sleep=", acpi_sleep_setup); + #endif /*CONFIG_ACPI_SLEEP*/ void acpi_pci_link_exit(void) {} --- linux-2.6.2-rc1/arch/x86_64/kernel/acpi/wakeup.S 2003-07-02 14:53:14.000000000 -0700 +++ 25/arch/x86_64/kernel/acpi/wakeup.S 2004-01-21 23:30:01.000000000 -0800 @@ -41,7 +41,19 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic + testl $1, video_flags - wakeup_code + jz 1f lcall $0xc000,$3 + movw %cs, %ax + movw %ax, %ds # Bios might have played with that + movw %ax, %ss +1: + + testl $2, video_flags - wakeup_code + jz 1f + mov video_mode - wakeup_code, %ax + call mode_seta +1: movw $0xb800, %ax movw %ax,%fs @@ -250,6 +262,7 @@ real_save_gdt: .word 0 .quad 0 real_magic: .quad 0 video_mode: .quad 0 +video_flags: .quad 0 bogus_real_magic: movb $0xba,%al ; outb %al,$0x80 @@ -382,8 +395,10 @@ ENTRY(acpi_copy_wakeup_routine) movl %eax, saved_efer movl %edx, saved_efer2 -# movq saved_videomode, %rdx # FIXME: videomode - movq %rdx, video_mode - wakeup_start (,%rdi) + movl saved_video_mode, %edx + movl %edx, video_mode - wakeup_start (,%rdi) + movl acpi_video_flags, %edx + movl %edx, video_flags - wakeup_start (,%rdi) movq $0x12345678, real_magic - wakeup_start (,%rdi) movq $0x123456789abcdef0, %rdx movq %rdx, saved_magic --- linux-2.6.2-rc1/arch/x86_64/kernel/aperture.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/x86_64/kernel/aperture.c 2004-01-21 23:30:33.000000000 -0800 @@ -87,13 +87,15 @@ static int __init aperture_valid(char *n /* Find a PCI capability */ static __u32 __init find_cap(int num, int slot, int func, int cap) { + u8 pos; + int bytes; if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST)) return 0; - u8 pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST); - int bytes; + pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST); for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + u8 id; pos &= ~3; - u8 id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID); + id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID); if (id == 0xff) break; if (id == cap) @@ -106,26 +108,31 @@ static __u32 __init find_cap(int num, in /* Read a standard AGPv3 bridge header */ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) { - printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); - u32 apsizereg = read_pci_config_16(num,slot,func, cap + 0x14); + u32 apsize; + u32 apsizereg; + int nbits; + u32 aper_low, aper_hi; + u64 aper; + printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); + apsizereg = read_pci_config_16(num,slot,func, cap + 0x14); if (apsizereg == 0xffffffff) { printk("APSIZE in AGP bridge unreadable\n"); return 0; } - u32 apsize = apsizereg & 0xfff; + apsize = apsizereg & 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ if (apsize & 0xff) apsize |= 0xf00; - int nbits = hweight16(apsize); + nbits = hweight16(apsize); *order = 7 - nbits; if ((int)*order < 0) /* < 32MB */ *order = 0; - u32 aper_low = read_pci_config(num,slot,func, 0x10); - u32 aper_hi = read_pci_config(num,slot,func,0x14); - u64 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); + aper_low = read_pci_config(num,slot,func, 0x10); + aper_hi = read_pci_config(num,slot,func,0x14); + aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", aper, 32 << *order, apsizereg); @@ -155,6 +162,7 @@ static __u32 __init search_agp_bridge(u3 for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class, cap; + u8 type; class = read_pci_config(num,slot,func, PCI_CLASS_REVISION); if (class == 0xffffffff) @@ -172,7 +180,7 @@ static __u32 __init search_agp_bridge(u3 } /* No multi-function device? */ - u8 type = read_pci_config_byte(num,slot,func, + type = read_pci_config_byte(num,slot,func, PCI_HEADER_TYPE); if (!(type & 0x80)) break; --- linux-2.6.2-rc1/arch/x86_64/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/entry.S 2004-01-21 23:26:59.000000000 -0800 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ #include #include #include -#include .code64 @@ -82,22 +82,53 @@ /* push in order ss, rsp, eflags, cs, rip */ xorq %rax, %rax pushq %rax /* ss */ + CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* rsp */ + CFI_ADJUST_CFA_OFFSET 8 + CFI_OFFSET rip,0 pushq $(1<<9) /* eflags - interrupts on */ + CFI_ADJUST_CFA_OFFSET 8 pushq $__KERNEL_CS /* cs */ + CFI_ADJUST_CFA_OFFSET 8 pushq \child_rip /* rip */ + CFI_ADJUST_CFA_OFFSET 8 + CFI_OFFSET rip,0 pushq %rax /* orig rax */ + CFI_ADJUST_CFA_OFFSET 8 .endm .macro UNFAKE_STACK_FRAME addq $8*6, %rsp + CFI_ADJUST_CFA_OFFSET -(6*8) .endm + .macro CFI_DEFAULT_STACK + CFI_ADJUST_CFA_OFFSET (SS) + CFI_OFFSET r15,R15-SS + CFI_OFFSET r14,R14-SS + CFI_OFFSET r13,R13-SS + CFI_OFFSET r12,R12-SS + CFI_OFFSET rbp,RBP-SS + CFI_OFFSET rbx,RBX-SS + CFI_OFFSET r11,R11-SS + CFI_OFFSET r10,R10-SS + CFI_OFFSET r9,R9-SS + CFI_OFFSET r8,R8-SS + CFI_OFFSET rax,RAX-SS + CFI_OFFSET rcx,RCX-SS + CFI_OFFSET rdx,RDX-SS + CFI_OFFSET rsi,RSI-SS + CFI_OFFSET rdi,RDI-SS + CFI_OFFSET rsp,RSP-SS + CFI_OFFSET rip,RIP-SS + .endm /* * A newly forked process directly context switches into this. */ /* rdi: prev */ ENTRY(ret_from_fork) + CFI_STARTPROC + CFI_DEFAULT_STACK call schedule_tail GET_THREAD_INFO(%rcx) bt $TIF_SYSCALL_TRACE,threadinfo_flags(%rcx) @@ -115,6 +146,7 @@ rff_trace: call syscall_trace GET_THREAD_INFO(%rcx) jmp rff_action + CFI_ENDPROC /* * System call entry. Upto 6 arguments in registers are supported. @@ -144,6 +176,7 @@ rff_trace: */ ENTRY(system_call) + CFI_STARTPROC swapgs movq %rsp,%gs:pda_oldrsp movq %gs:pda_kernelstack,%rsp @@ -283,6 +316,7 @@ int_signal: int_restore_rest: RESTORE_REST jmp int_with_check + CFI_ENDPROC /* * Certain special system calls that need to save a complete full stack frame. @@ -303,7 +337,9 @@ int_restore_rest: PTREGSCALL stub_iopl, sys_iopl ENTRY(ptregscall_common) + CFI_STARTPROC popq %r11 + CFI_ADJUST_CFA_OFFSET -8 SAVE_REST movq %r11, %r15 FIXUP_TOP_OF_STACK %r11 @@ -312,10 +348,14 @@ ENTRY(ptregscall_common) movq %r15, %r11 RESTORE_REST pushq %r11 + CFI_ADJUST_CFA_OFFSET 8 ret + CFI_ENDPROC ENTRY(stub_execve) + CFI_STARTPROC popq %r11 + CFI_ADJUST_CFA_OFFSET -8 SAVE_REST movq %r11, %r15 FIXUP_TOP_OF_STACK %r11 @@ -330,15 +370,18 @@ ENTRY(stub_execve) ret exec_32bit: + CFI_ADJUST_CFA_OFFSET REST_SKIP movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call + CFI_ENDPROC /* * sigreturn is special because it needs to restore all registers on return. * This cannot be done with SYSRET, so use the IRET return path instead. */ ENTRY(stub_rt_sigreturn) + CFI_STARTPROC addq $8, %rsp SAVE_REST FIXUP_TOP_OF_STACK %r11 @@ -346,6 +389,7 @@ ENTRY(stub_rt_sigreturn) movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer RESTORE_REST jmp int_ret_from_sys_call + CFI_ENDPROC /* * Interrupt entry/exit. @@ -357,10 +401,20 @@ ENTRY(stub_rt_sigreturn) /* 0(%rsp): interrupt number */ .macro interrupt func + CFI_STARTPROC simple + CFI_DEF_CFA rsp,(SS-ORIG_RAX) + CFI_OFFSET rsp,(RSP-SS) + CFI_OFFSET rip,(RIP-SS) cld -#ifdef CONFIG_X86_REMOTE_DEBUG +#ifdef CONFIG_KGDB SAVE_ALL movq %rsp,%rdi + /* + * Setup a stack frame pointer. This allows gdb to trace + * back to the original stack. + */ + movq %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp #else SAVE_ARGS leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler @@ -382,6 +436,9 @@ ret_from_intr: popq %rdi cli subl $1,%gs:pda_irqcount +#ifdef CONFIG_KGDB + movq RBP(%rdi),%rbp +#endif leaq ARGOFFSET(%rdi),%rsp exit_intr: GET_THREAD_INFO(%rcx) @@ -465,6 +522,7 @@ retint_kernel: movl $0,threadinfo_preempt_count(%rcx) jmp exit_intr #endif + CFI_ENDPROC /* * APIC interrupts. @@ -473,6 +531,7 @@ retint_kernel: pushq $\num-256 interrupt \func jmp ret_from_intr + CFI_ENDPROC .endm #ifdef CONFIG_SMP @@ -518,24 +577,43 @@ ENTRY(spurious_interrupt) * and the exception handler in %rax. */ ENTRY(error_entry) + CFI_STARTPROC simple + CFI_DEF_CFA rsp,(SS-RDI) + CFI_REL_OFFSET rsp,(RSP-RDI) + CFI_REL_OFFSET rip,(RIP-RDI) /* rdi slot contains rax, oldrax contains error code */ cld subq $14*8,%rsp + CFI_ADJUST_CFA_OFFSET (14*8) movq %rsi,13*8(%rsp) + CFI_REL_OFFSET rsi,RSI movq 14*8(%rsp),%rsi /* load rax from rdi slot */ movq %rdx,12*8(%rsp) + CFI_REL_OFFSET rdx,RDX movq %rcx,11*8(%rsp) + CFI_REL_OFFSET rcx,RCX movq %rsi,10*8(%rsp) /* store rax */ + CFI_REL_OFFSET rax,RAX movq %r8, 9*8(%rsp) + CFI_REL_OFFSET r8,R8 movq %r9, 8*8(%rsp) + CFI_REL_OFFSET r9,R9 movq %r10,7*8(%rsp) + CFI_REL_OFFSET r10,R10 movq %r11,6*8(%rsp) + CFI_REL_OFFSET r11,R11 movq %rbx,5*8(%rsp) + CFI_REL_OFFSET rbx,RBX movq %rbp,4*8(%rsp) + CFI_REL_OFFSET rbp,RBP movq %r12,3*8(%rsp) + CFI_REL_OFFSET r12,R12 movq %r13,2*8(%rsp) + CFI_REL_OFFSET r13,R13 movq %r14,1*8(%rsp) + CFI_REL_OFFSET r14,R14 movq %r15,(%rsp) + CFI_REL_OFFSET r15,R15 xorl %ebx,%ebx testl $3,CS(%rsp) je error_kernelspace @@ -561,6 +639,7 @@ error_exit: swapgs RESTORE_ARGS 0,8,0 iretq + CFI_ENDPROC error_kernelspace: incl %ebx @@ -615,6 +694,7 @@ bad_gs: * rdi: fn, rsi: arg, rdx: flags */ ENTRY(kernel_thread) + CFI_STARTPROC FAKE_STACK_FRAME $child_rip SAVE_ALL @@ -642,6 +722,8 @@ ENTRY(kernel_thread) RESTORE_ALL UNFAKE_STACK_FRAME ret + CFI_ENDPROC + child_rip: /* @@ -671,6 +753,7 @@ child_rip: * rdi: name, rsi: argv, rdx: envp, fake frame on the stack */ ENTRY(execve) + CFI_STARTPROC FAKE_STACK_FRAME $0 SAVE_ALL call sys_execve @@ -681,6 +764,7 @@ ENTRY(execve) RESTORE_ARGS UNFAKE_STACK_FRAME ret + CFI_ENDPROC ENTRY(page_fault) errorentry do_page_fault @@ -692,6 +776,7 @@ ENTRY(simd_coprocessor_error) zeroentry do_simd_coprocessor_error ENTRY(device_not_available) + CFI_STARTPROC pushq $-1 #error code SAVE_ALL movl $1,%ebx @@ -706,11 +791,13 @@ ENTRY(device_not_available) cmoveq %rcx,%rdx call *%rdx jmp error_exit + CFI_ENDPROC ENTRY(debug) zeroentry do_debug ENTRY(nmi) + CFI_STARTPROC pushq $-1 SAVE_ALL /* NMI could happen inside the critical section of a swapgs, @@ -731,6 +818,7 @@ ENTRY(nmi) swapgs 2: RESTORE_ALL 8 iretq + CFI_ENDPROC ENTRY(int3) zeroentry do_int3 --- linux-2.6.2-rc1/arch/x86_64/kernel/io_apic.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/io_apic.c 2004-01-21 23:27:03.000000000 -0800 @@ -1121,8 +1121,6 @@ static void __init setup_ioapic_ids_from unsigned char old_id; unsigned long flags; - if (acpi_ioapic) return; /* ACPI does that already */ - /* * Set the IOAPIC ID to the value stored in the MPC table. */ @@ -1751,12 +1749,14 @@ void __init setup_IO_APIC(void) /* * Set up the IO-APIC IRQ routing table. */ - setup_ioapic_ids_from_mpc(); + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - print_IO_APIC(); + if (!acpi_ioapic) + print_IO_APIC(); } /* Ensure the ACPI SCI interrupt level is active low, edge-triggered */ --- linux-2.6.2-rc1/arch/x86_64/kernel/irq.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/irq.c 2004-01-21 23:27:00.000000000 -0800 @@ -405,6 +405,11 @@ out: spin_unlock(&desc->lock); irq_exit(); + +#ifdef CONFIG_KGDB_IRQ + kgdb_process_breakpoint(); +#endif + return 1; } --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/kernel/kgdb_stub.c 2004-01-21 23:27:00.000000000 -0800 @@ -0,0 +1,2594 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * X86_64 changes from Andi Kleen's patch merged by Jim Houston + * (jim.houston@ccur.com). If it works thank Andi if its broken + * blame me. + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define Dearly_printk(x...) +int kgdb_enabled = 0; + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES (NUMREGS * sizeof(unsigned long)) +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + * + * Could add XMM and segment registers here. + */ +enum regnames {_RAX, + _RBX, + _RCX, + _RDX, + _RSI, + _RDI, + _RBP, + _RSP, + _R8, + _R9, + _R10, + _R11, + _R12, + _R13, + _R14, + _R15, + _PC, + _PS, + NUMREGS }; + + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned long rsp; + unsigned long array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned long OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movq %rax,%rsp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addq $-8,%rbx\n\t" + "movq (%rbx), %rax\n\t" + "pushq %rax\n\t" + "cmpq %rsp,%rcx\n\t" + "jne 1b\n\t" + "popq %rax\n\t" + "popq %rbx\n\t" + "popq %rcx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static char lbuf[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("RAX=%016lx RBX=%016lx RCX=%016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX=%016lx RSI=%016lx RDI=%016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP=%016lx PS=%016lx PC=%016lx\n", + regs->rbp, regs->eflags, regs->rip); + printk("R8=%016lx R9=%016lx R10=%016lx\n", + regs->r8, regs->r9, regs->r10); + printk("R11=%016lx R12=%016lx R13=%016lx\n", + regs->r11, regs->r12, regs->r13); + printk("R14=%016lx R15=%016lx RSP=%016lx\n", + regs->r14, regs->r15, regs->rsp); +} + +#define NEW_esp fn_call_lookaside[trap_cpu].rsp + +static void +regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_RAX] = regs->rax; + gdb_regs[_RBX] = regs->rbx; + gdb_regs[_RCX] = regs->rcx; + gdb_regs[_RDX] = regs->rdx; + gdb_regs[_RSI] = regs->rsi; + gdb_regs[_RDI] = regs->rdi; + gdb_regs[_RBP] = regs->rbp; + gdb_regs[ _PS] = regs->eflags; + gdb_regs[ _PC] = regs->rip; + gdb_regs[ _R8] = regs->r8; + gdb_regs[ _R9] = regs->r9; + gdb_regs[_R10] = regs->r10; + gdb_regs[_R11] = regs->r11; + gdb_regs[_R12] = regs->r12; + gdb_regs[_R13] = regs->r13; + gdb_regs[_R14] = regs->r14; + gdb_regs[_R15] = regs->r15; + gdb_regs[_RSP] = regs->rsp; + + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ +} + +static void +gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->rax = gdb_regs[_RAX] ; + regs->rbx = gdb_regs[_RBX] ; + regs->rcx = gdb_regs[_RCX] ; + regs->rdx = gdb_regs[_RDX] ; + regs->rsi = gdb_regs[_RSI] ; + regs->rdi = gdb_regs[_RDI] ; + regs->rbp = gdb_regs[_RBP] ; + regs->eflags = gdb_regs[ _PS] ; + regs->rip = gdb_regs[ _PC] ; + regs->r8 = gdb_regs[ _R8] ; + regs->r9 = gdb_regs[ _R9] ; + regs->r10 = gdb_regs[ _R10] ; + regs->r11 = gdb_regs[ _R11] ; + regs->r12 = gdb_regs[ _R12] ; + regs->r13 = gdb_regs[ _R13] ; + regs->r14 = gdb_regs[ _R14] ; + regs->r15 = gdb_regs[ _R15] ; + #if 0 /* can't change these */ + regs->rsp = gdb_regs[_RSP] ; + regs->ss = gdb_regs[ _SS] ; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif +} /* gdb_regs_to_regs */ + +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; +extern void thread_return(void); + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, unsigned long *gdb_regs) +{ + unsigned long **rbp, *rsp, *rsp0, pc; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_RSP] = + (unsigned long)&kgdb_info.cpus_waiting[i].regs->rsp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + rsp = (unsigned long *)p->thread.rsp; + rbp = (unsigned long **)rsp[0]; + rsp += 2; + gdb_regs[_PC] = (unsigned long)thread_return; + gdb_regs[_RBP] = (unsigned long)rbp; + gdb_regs[_RSP] = (unsigned long)rsp; + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + rsp0 = (unsigned long *)p->thread.rsp0; + if (rsp < (unsigned long *) p->thread_info || rsp > rsp0) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (*rbp < rsp || *rbp > rsp0) + break; + rbp = (unsigned long **)*rbp; + rsp = (unsigned long *)rbp; + pc = rsp[1]; + + if (pc < first_sched || pc >= last_sched) + break; + gdb_regs[_PC] = (unsigned long)pc; + gdb_regs[_RSP] = (unsigned long)rsp; + gdb_regs[_RBP] = (unsigned long)rbp; + } while (count++ < 16); + return; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* returns nonzero if any memory access fails. */ +int mem2hex( char* mem, char* buf, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (ret) { + Dearly_printk("mem2hex: fault at accessing %p\n", mem); + } + return(ret); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return nonzero if any memory access fails. */ +int hex2mem( char* buf, char* mem, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} +#endif + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToLong(char **ptr, unsigned long *value) +{ + int numChars = 0; + int hexValue; + + *value = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *value = (*value << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + if (!cpu_online(pid - PID_MAX)) + return NULL; + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned long addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned long hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned long dr7; + + asm volatile ("movq %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned long addr0, addr1, addr2, addr3; + asm volatile ("movq %%db0, %0\n" + "movq %%db1, %1\n" + "movq %%db2, %2\n" + "movq %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movq %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movq %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movq %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movq %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movq %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned long flags; + int cpu; + if (!kgdb_enabled) + return 0; + cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned long dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * The ThreadExtraInfo query allows us to pass an arbitrary string + * for display with the "info threads" command. + */ + +void +print_extra_info(task_t *p, char *buf) +{ + if (!p) { + sprintf(buf, "Invalid thread"); + return; + } + sprintf(buf, "0x%p %8d %4d %c %s\n", + (void *)p, p->parent->pid, + task_cpu(p), + (p->state == 0) ? (task_curr(p)?'R':'r') : + (p->state < 0) ? 'U' : + (p->state & TASK_UNINTERRUPTIBLE) ? 'D' : + (p->state & TASK_STOPPED || p->ptrace & PT_PTRACED) ? 'T' : + (p->state & (TASK_ZOMBIE | TASK_DEAD)) ? 'Z' : + (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?', + p->comm); +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + struct task_struct *p; + unsigned long addr, length; + unsigned long breakno, breaktype; + char *ptr; + unsigned long newPC; + threadref thref; + unsigned long threadid, tmpid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + unsigned long gdb_regs[NUMREGS]; + unsigned long dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->cs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time, end_time, dum; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movq %0,%%db7" + : /* no output */ + :"r"(0UL)); + + asm volatile ("movq %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +#if 0 +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } +#endif + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (unsigned long) (&linux_regs->rsp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). + */ + unsigned long regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToLong(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + if (regno >= NUMREGS) + break; + hex2mem(ptr, (char *) &gdb_regs[regno], + 8); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && (hexToLong(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + if (mem2hex((char *) addr, + remcomOutBuffer, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToLong(&ptr, &length)) && (*(ptr++) == ':')) { + if (hex2mem(ptr, (char *) addr, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%lx\n", addr); + + regs.rip = addr; + } + + newPC = regs.rip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movq %0, %%db6\n"::"r" (0UL)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + /* If start flag set start at 0. */ + if (remcomInBuffer[2] == '1') + threadid = 0; + else + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T': + ptr = &remcomInBuffer[0]; + if (strncmp(ptr, "qThreadExtraInfo,", + strlen("qThreadExtraInfo,")) == 0) { + ptr += strlen("qThreadExtraInfo,"); + hexToLong(&ptr, &tmpid); + p = getthread(tmpid); + print_extra_info(p, lbuf); + mem2hex(lbuf, remcomOutBuffer, + strlen(lbuf)); + } + break; +#if 0 + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } +#endif + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + ptr++; + hexToLong(&ptr, &breaktype); + ptr++; + hexToLong(&ptr, &length); + ptr++; + hexToLong(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + unsigned long *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (unsigned long); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->cs; + *--ptr = linux_regs->rip; + *--ptr = linux_regs->rcx; + *--ptr = linux_regs->rbx; + *--ptr = linux_regs->rax; + linux_regs->rcx = NEW_esp - (sizeof (unsigned long) * 6); + linux_regs->rbx = (unsigned long) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->rip = (unsigned long) fn_call_stub; + } else { + linux_regs->rip = (unsigned long) fn_rtn_stub; + linux_regs->rax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + local_irq_restore(flags); + return (1); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + local_irq_restore(flags); + return (1); +} + +#undef regs +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + struct die_args *d = ptr; + + if (!kgdb_enabled || (cmd == DIE_DEBUG && user_mode(d->regs))) + return NOTIFY_DONE; + if (cmd == DIE_NMI_IPI) { + if (in_kgdb(d->regs)) + return NOTIFY_BAD; + } else if (kgdb_handle_exception(d->trapnr, d->signr, d->err, d->regs)) + return NOTIFY_BAD; /* skip */ + + return NOTIFY_DONE; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + .priority = 0, +}; + +void set_debug_traps(void) +{ + static int initialized = 0; + + if (!initialized) { + initialized = 1; + notifier_chain_register(&die_chain, &kgdb_notifier); + } +} + +/* + * Provide the command line "gdb" initial break + */ +int __init kgdb_initial_break(char * str) +{ + if (*str == '\0'){ + breakpoint(); + return 1; + } + return 0; +} +__setup("gdb",kgdb_initial_break); + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +void breakpoint(void) +{ + + set_debug_traps(); + kgdb_enabled = 1; +#if 0 + /* + * These calls were not enough to allow breakpoint to be + * called before trap_init(). I moved the argument parsing + * after trap_init() and it seems to work. + */ + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); +#endif + + BREAKPOINT; +} + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +} + --- linux-2.6.2-rc1/arch/x86_64/kernel/Makefile 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/Makefile 2004-01-21 23:27:00.000000000 -0800 @@ -24,6 +24,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o a obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-y += topology.o --- linux-2.6.2-rc1/arch/x86_64/kernel/mpparse.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/mpparse.c 2004-01-21 23:27:03.000000000 -0800 @@ -951,6 +951,8 @@ void __init mp_parse_prt (void) entry->irq); } + print_IO_APIC(); + return; } --- linux-2.6.2-rc1/arch/x86_64/kernel/nmi.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/arch/x86_64/kernel/nmi.c 2004-01-21 23:30:33.000000000 -0800 @@ -311,11 +311,11 @@ void touch_nmi_watchdog (void) void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { + int sum, cpu = safe_smp_processor_id(); + if (nmi_watchdog_disabled) return; - int sum, cpu = safe_smp_processor_id(); - sum = read_pda(apic_timer_irqs); if (last_irq_sums[cpu] == sum) { /* --- linux-2.6.2-rc1/arch/x86_64/kernel/pci-dma.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/x86_64/kernel/pci-dma.c 2004-01-21 23:30:01.000000000 -0800 @@ -35,6 +35,7 @@ int pci_map_sg(struct pci_dev *hwdev, st BUG_ON(!s->page); s->dma_address = pci_map_page(hwdev, s->page, s->offset, s->length, direction); + s->dma_length = s->length; } return nents; } @@ -53,7 +54,7 @@ void pci_unmap_sg(struct pci_dev *dev, s struct scatterlist *s = &sg[i]; BUG_ON(s->page == NULL); BUG_ON(s->dma_address == 0); - pci_unmap_single(dev, s->dma_address, s->length, dir); + pci_unmap_single(dev, s->dma_address, s->dma_length, dir); } } --- linux-2.6.2-rc1/arch/x86_64/kernel/pci-gart.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/pci-gart.c 2004-01-21 23:30:33.000000000 -0800 @@ -117,11 +117,11 @@ static unsigned long alloc_iommu(int siz static void free_iommu(unsigned long offset, int size) { + unsigned long flags; if (size == 1) { clear_bit(offset, iommu_gart_bitmap); return; } - unsigned long flags; spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); @@ -329,6 +329,7 @@ static dma_addr_t pci_map_area(struct pc { unsigned long npages = to_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(npages); + int i; if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -338,7 +339,6 @@ static dma_addr_t pci_map_area(struct pc return bad_dma_address; } - int i; for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); SET_LEAK(iommu_page + i); @@ -382,10 +382,12 @@ static int pci_map_sg_nonforce(struct pc if (i > 0) pci_unmap_sg(dev, sg, i, dir); nents = 0; + sg[0].dma_length = 0; break; } } s->dma_address = addr; + s->dma_length = s->length; } flush_gart(dev); return nents; @@ -396,11 +398,11 @@ static int __pci_map_cont(struct scatter struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); - if (iommu_start == -1) - return -1; - unsigned long iommu_page = iommu_start; int i; + + if (iommu_start == -1) + return -1; for (i = start; i < stopat; i++) { struct scatterlist *s = &sg[i]; @@ -412,8 +414,9 @@ static int __pci_map_cont(struct scatter *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; + sout->dma_length = s->length; } else { - sout->length += s->length; + sout->dma_length += s->length; } addr = phys_addr; @@ -436,6 +439,7 @@ static inline int pci_map_cont(struct sc if (!need) { BUG_ON(stopat - start != 1); *sout = sg[start]; + sout->dma_length = sg[start].length; return 0; } return __pci_map_cont(sg, start, stopat, sout, pages); @@ -453,8 +457,6 @@ int pci_map_sg(struct pci_dev *dev, stru unsigned long pages = 0; int need = 0, nextneed; - unsigned long size = 0; - BUG_ON(dir == PCI_DMA_NONE); if (nents == 0) return 0; @@ -466,7 +468,6 @@ int pci_map_sg(struct pci_dev *dev, stru s->dma_address = addr; BUG_ON(s->length == 0); - size += s->length; nextneed = need_iommu(dev, addr, s->length); /* Handle the previous not yet processed entries */ @@ -493,7 +494,7 @@ int pci_map_sg(struct pci_dev *dev, stru out++; flush_gart(dev); if (out < nents) - sg[out].length = 0; + sg[out].dma_length = 0; return out; error: @@ -518,12 +519,12 @@ void pci_unmap_single(struct pci_dev *hw { unsigned long iommu_page; int npages; + int i; if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || dma_addr > iommu_bus_base + iommu_size) return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; npages = to_pages(dma_addr, size); - int i; for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = 0; CLEAR_LEAK(iommu_page + i); @@ -540,9 +541,9 @@ void pci_unmap_sg(struct pci_dev *dev, s int i; for (i = 0; i < nents; i++) { struct scatterlist *s = &sg[i]; - if (!s->length) + if (!s->dma_length || !s->length) break; - pci_unmap_single(dev, s->dma_address, s->length, dir); + pci_unmap_single(dev, s->dma_address, s->dma_length, dir); } } @@ -800,7 +801,8 @@ fs_initcall(pci_iommu_init); merge Do SG merging. Implies force (experimental) nomerge Don't do SG merging. forcesac For SAC mode for masks <40bits (experimental) - fullflush Flush IOMMU on each allocation (for testing) + fullflush Flush IOMMU on each allocation (default) + nofullflush Don't use IOMMU fullflush */ __init int iommu_setup(char *opt) { @@ -838,6 +840,8 @@ __init int iommu_setup(char *opt) iommu_sac_force = 1; if (!memcmp(p, "fullflush", 9)) iommu_fullflush = 1; + if (!memcmp(p, "nofullflush", 11)) + iommu_fullflush = 0; #ifdef CONFIG_IOMMU_LEAK if (!memcmp(p,"leak", 4)) { leak_trace = 1; --- linux-2.6.2-rc1/arch/x86_64/kernel/process.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/process.c 2004-01-21 23:30:01.000000000 -0800 @@ -166,6 +166,8 @@ void idle_warning(void) printk(KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"); } +EXPORT_SYMBOL(idle_warning); + /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) { @@ -440,7 +442,6 @@ struct task_struct *__switch_to(struct t write_pda(pcurrent, next_p); write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); - /* * Now maybe reload the debug registers */ --- linux-2.6.2-rc1/arch/x86_64/kernel/setup.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/setup.c 2004-01-21 23:27:03.000000000 -0800 @@ -65,7 +65,11 @@ unsigned long mmu_cr4_features; EXPORT_SYMBOL_GPL(mmu_cr4_features); int acpi_disabled = 0; -int acpi_ht = 0; + +#ifdef CONFIG_ACPI_BOOT +extern int __initdata acpi_ht; +/* int __initdata acpi_force = 0; */ +#endif /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0x10000000; @@ -195,6 +199,7 @@ static __init void parse_cmdline_early ( if (c != ' ') goto next_char; +#ifdef CONFIG_ACPI_BOOT /* "acpi=off" disables both ACPI table parsing and interpreter init */ if (!memcmp(from, "acpi=off", 8)) acpi_disabled = 1; @@ -210,6 +215,7 @@ static __init void parse_cmdline_early ( if (!memcmp(from, "acpi=ht", 7)) { acpi_ht = 1; } +#endif if (!memcmp(from, "nolapic", 7) || !memcmp(from, "disableapic", 11)) --- linux-2.6.2-rc1/arch/x86_64/kernel/smp.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/smp.c 2004-01-21 23:27:00.000000000 -0800 @@ -362,6 +362,18 @@ void smp_send_reschedule(int cpu) send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif + /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. --- linux-2.6.2-rc1/arch/x86_64/kernel/time.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/time.c 2004-01-21 23:30:01.000000000 -0800 @@ -10,6 +10,7 @@ * Copyright (c) 1998 Andrea Arcangeli * Copyright (c) 2002 Vojtech Pavlik * Copyright (c) 2003 Andi Kleen + * RTC support code taken from arch/i386/kernel/timers/time_hpet.c * */ @@ -28,6 +29,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -627,10 +629,10 @@ static int hpet_init(void) * and period also hpet_tick. */ - hpet_writel(HPET_T0_ENABLE | HPET_T0_PERIODIC | HPET_T0_SETVAL | - HPET_T0_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); + hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + HPET_TN_32BIT, HPET_T0_CFG); hpet_writel(hpet_tick, HPET_T0_CMP); + hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ /* * Go! @@ -733,3 +735,229 @@ void __init time_init_smp(void) } __setup("report_lost_ticks", time_setup); + +#ifdef CONFIG_HPET_EMULATE_RTC +/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET + * is enabled, we support RTC interrupt functionality in software. + * RTC has 3 kinds of interrupts: + * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock + * is updated + * 2) Alarm Interrupt - generate an interrupt at a specific time of day + * 3) Periodic Interrupt - generate periodic interrupt, with frequencies + * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) + * (1) and (2) above are implemented using polling at a frequency of + * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt + * overhead. (DEFAULT_RTC_INT_FREQ) + * For (3), we use interrupts at 64Hz or user specified periodic + * frequency, whichever is higher. + */ +#include +#include + +extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +#define DEFAULT_RTC_INT_FREQ 64 +#define RTC_NUM_INTS 1 + +static unsigned long UIE_on; +static unsigned long prev_update_sec; + +static unsigned long AIE_on; +static struct rtc_time alarm_time; + +static unsigned long PIE_on; +static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; +static unsigned long PIE_count; + +static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ + +int is_hpet_enabled(void) +{ + return vxtime.hpet_address != 0; +} + +/* + * Timer 1 for RTC, we do not use periodic interrupt feature, + * even if HPET supports periodic interrupts on Timer 1. + * The reason being, to set up a periodic interrupt in HPET, we need to + * stop the main counter. And if we do that everytime someone diables/enables + * RTC, we will have adverse effect on main kernel timer running on Timer 0. + * So, for the time being, simulate the periodic interrupt in software. + * + * hpet_rtc_timer_init() is called for the first time and during subsequent + * interuppts reinit happens through hpet_rtc_timer_reinit(). + */ +int hpet_rtc_timer_init(void) +{ + unsigned int cfg, cnt; + unsigned long flags; + + if (!is_hpet_enabled()) + return 0; + /* + * Set the counter 1 and enable the interrupts. + */ + if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) + hpet_rtc_int_freq = PIE_freq; + else + hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; + + local_irq_save(flags); + cnt = hpet_readl(HPET_COUNTER); + cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); + hpet_writel(cnt, HPET_T1_CMP); + local_irq_restore(flags); + + cfg = hpet_readl(HPET_T1_CFG); + cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; + hpet_writel(cfg, HPET_T1_CFG); + + return 1; +} + +static void hpet_rtc_timer_reinit(void) +{ + unsigned int cfg, cnt; + + if (!(PIE_on | AIE_on | UIE_on)) + return; + + if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) + hpet_rtc_int_freq = PIE_freq; + else + hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; + + /* It is more accurate to use the comparator value than current count.*/ + cnt = hpet_readl(HPET_T1_CMP); + cnt += hpet_tick*HZ/hpet_rtc_int_freq; + hpet_writel(cnt, HPET_T1_CMP); + + cfg = hpet_readl(HPET_T1_CFG); + cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; + hpet_writel(cfg, HPET_T1_CFG); + + return; +} + +/* + * The functions below are called from rtc driver. + * Return 0 if HPET is not being used. + * Otherwise do the necessary changes and return 1. + */ +int hpet_mask_rtc_irq_bit(unsigned long bit_mask) +{ + if (!is_hpet_enabled()) + return 0; + + if (bit_mask & RTC_UIE) + UIE_on = 0; + if (bit_mask & RTC_PIE) + PIE_on = 0; + if (bit_mask & RTC_AIE) + AIE_on = 0; + + return 1; +} + +int hpet_set_rtc_irq_bit(unsigned long bit_mask) +{ + int timer_init_reqd = 0; + + if (!is_hpet_enabled()) + return 0; + + if (!(PIE_on | AIE_on | UIE_on)) + timer_init_reqd = 1; + + if (bit_mask & RTC_UIE) { + UIE_on = 1; + } + if (bit_mask & RTC_PIE) { + PIE_on = 1; + PIE_count = 0; + } + if (bit_mask & RTC_AIE) { + AIE_on = 1; + } + + if (timer_init_reqd) + hpet_rtc_timer_init(); + + return 1; +} + +int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) +{ + if (!is_hpet_enabled()) + return 0; + + alarm_time.tm_hour = hrs; + alarm_time.tm_min = min; + alarm_time.tm_sec = sec; + + return 1; +} + +int hpet_set_periodic_freq(unsigned long freq) +{ + if (!is_hpet_enabled()) + return 0; + + PIE_freq = freq; + PIE_count = 0; + + return 1; +} + +int hpet_rtc_dropped_irq(void) +{ + if (!is_hpet_enabled()) + return 0; + + return 1; +} + +irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct rtc_time curr_time; + unsigned long rtc_int_flag = 0; + int call_rtc_interrupt = 0; + + hpet_rtc_timer_reinit(); + + if (UIE_on | AIE_on) { + rtc_get_rtc_time(&curr_time); + } + if (UIE_on) { + if (curr_time.tm_sec != prev_update_sec) { + /* Set update int info, call real rtc int routine */ + call_rtc_interrupt = 1; + rtc_int_flag = RTC_UF; + prev_update_sec = curr_time.tm_sec; + } + } + if (PIE_on) { + PIE_count++; + if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { + /* Set periodic int info, call real rtc int routine */ + call_rtc_interrupt = 1; + rtc_int_flag |= RTC_PF; + PIE_count = 0; + } + } + if (AIE_on) { + if ((curr_time.tm_sec == alarm_time.tm_sec) && + (curr_time.tm_min == alarm_time.tm_min) && + (curr_time.tm_hour == alarm_time.tm_hour)) { + /* Set alarm int info, call real rtc int routine */ + call_rtc_interrupt = 1; + rtc_int_flag |= RTC_AF; + } + } + if (call_rtc_interrupt) { + rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); + rtc_interrupt(rtc_int_flag, dev_id, regs); + } + return IRQ_HANDLED; +} +#endif --- linux-2.6.2-rc1/arch/x86_64/kernel/traps.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/traps.c 2004-01-21 23:27:00.000000000 -0800 @@ -45,6 +45,9 @@ #include #include +#ifdef CONFIG_KGDB +#include +#endif extern struct gate_struct idt_table[256]; @@ -553,6 +556,8 @@ asmlinkage void default_do_nmi(struct pt unsigned char reason = inb(0x61); if (!(reason & 0xc0)) { + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) == NOTIFY_BAD) + return; #ifdef CONFIG_X86_LOCAL_APIC /* * Ok, so this is none of the documented NMI sources, --- linux-2.6.2-rc1/arch/x86_64/kernel/vmlinux.lds.S 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/x86_64/kernel/vmlinux.lds.S 2004-01-21 23:27:00.000000000 -0800 @@ -3,6 +3,7 @@ */ #include +#include OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) @@ -137,7 +138,9 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) +#ifndef CONFIG_KGDB *(.eh_frame) +#endif } /* DWARF 2 */ --- linux-2.6.2-rc1/arch/x86_64/kernel/x8664_ksyms.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/x8664_ksyms.c 2004-01-21 23:30:33.000000000 -0800 @@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); extern void * memset(void *,int,__kernel_size_t); extern size_t strlen(const char *); -extern void bcopy(const char * src, char * dest, int count); +extern void bcopy(const void * src, void * dest, size_t count); extern void * memmove(void * dest,const void *src,size_t count); extern char * strcpy(char * dest,const char *src); extern int strcmp(const char * cs,const char * ct); --- linux-2.6.2-rc1/arch/x86_64/lib/csum-partial.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/x86_64/lib/csum-partial.c 2004-01-21 23:30:33.000000000 -0800 @@ -56,6 +56,8 @@ static __force_inline unsigned do_csum(c } count >>= 1; /* nr of 32-bit words.. */ if (count) { + unsigned long zero; + unsigned count64; if (4 & (unsigned long) buff) { result += *(unsigned int *) buff; count--; @@ -65,8 +67,8 @@ static __force_inline unsigned do_csum(c count >>= 1; /* nr of 64-bit words.. */ /* main loop using 64byte blocks */ - unsigned long zero = 0; - unsigned count64 = count >> 3; + zero = 0; + count64 = count >> 3; while (count64) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/arch/x86_64/lib/kgdb_serial.c 2004-01-21 23:27:00.000000000 -0800 @@ -0,0 +1,490 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + set_debug_traps(); + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#define kgdb_mem_init_done() (1) + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.2-rc1/arch/x86_64/lib/Makefile 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/x86_64/lib/Makefile 2004-01-21 23:27:00.000000000 -0800 @@ -11,3 +11,4 @@ lib-y += memcpy.o memmove.o memset.o cop lib-$(CONFIG_IO_DEBUG) += iodebug.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o --- linux-2.6.2-rc1/arch/x86_64/lib/thunk.S 2003-06-14 12:18:09.000000000 -0700 +++ 25/arch/x86_64/lib/thunk.S 2004-01-21 23:27:00.000000000 -0800 @@ -8,6 +8,7 @@ #include #include + #include #include #include @@ -15,18 +16,22 @@ .macro thunk name,func .globl \name \name: + CFI_STARTPROC SAVE_ARGS call \func jmp restore + CFI_ENDPROC .endm /* rdi: arg1 ... normal C conventions. rax is passed from C. */ .macro thunk_retrax name,func .globl \name \name: + CFI_STARTPROC SAVE_ARGS call \func jmp restore_norax + CFI_ENDPROC .endm @@ -43,13 +48,20 @@ thunk_retrax __down_failed_trylock,__down_trylock thunk __up_wakeup,__up + /* SAVE_ARGS below is used only for the .cfi directives it contains. */ + CFI_STARTPROC + SAVE_ARGS restore: RESTORE_ARGS ret + CFI_ENDPROC + CFI_STARTPROC + SAVE_ARGS restore_norax: RESTORE_ARGS 1 ret + CFI_ENDPROC #ifdef CONFIG_SMP /* Support for read/write spinlocks. */ --- linux-2.6.2-rc1/arch/x86_64/lib/usercopy.c 2003-09-08 13:58:56.000000000 -0700 +++ 25/arch/x86_64/lib/usercopy.c 2004-01-21 23:30:33.000000000 -0800 @@ -88,7 +88,7 @@ unsigned long __clear_user(void *addr, u " .quad 1b,2b\n" ".previous" : [size8] "=c"(size), [dst] "=&D" (__d0) - : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst] "(addr), + : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); return size; } --- linux-2.6.2-rc1/arch/x86_64/Makefile 2003-09-27 18:57:44.000000000 -0700 +++ 25/arch/x86_64/Makefile 2004-01-21 23:27:00.000000000 -0800 @@ -49,11 +49,15 @@ CFLAGS += -fno-reorder-blocks # CFLAGS += -finline-limit=2000 CFLAGS += -Wno-sign-compare # don't enable this when you use kgdb: -ifneq ($(CONFIG_X86_REMOTE_DEBUG),y) +ifneq ($(CONFIG_KGDB),y) CFLAGS += -fno-asynchronous-unwind-tables endif #CFLAGS += $(call check_gcc,-funit-at-a-time,) +mflags-$(CONFIG_KGDB) += -gdwarf-2 +mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') +CFLAGS += $(mflags-y) + head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o libs-y += arch/x86_64/lib/ --- linux-2.6.2-rc1/arch/x86_64/mm/fault.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/mm/fault.c 2004-01-21 23:30:33.000000000 -0800 @@ -141,6 +141,10 @@ static int bad_address(void *p) void dump_pagetable(unsigned long address) { pml4_t *pml4; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + asm("movq %%cr3,%0" : "=r" (pml4)); pml4 = __va((unsigned long)pml4 & PHYSICAL_PAGE_MASK); @@ -149,17 +153,17 @@ void dump_pagetable(unsigned long addres if (bad_address(pml4)) goto bad; if (!pml4_present(*pml4)) goto ret; - pgd_t *pgd = __pgd_offset_k((pgd_t *)pml4_page(*pml4), address); + pgd = __pgd_offset_k((pgd_t *)pml4_page(*pml4), address); if (bad_address(pgd)) goto bad; printk("PGD %lx ", pgd_val(*pgd)); if (!pgd_present(*pgd)) goto ret; - pmd_t *pmd = pmd_offset(pgd, address); + pmd = pmd_offset(pgd, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); if (!pmd_present(*pmd)) goto ret; - pte_t *pte = pte_offset_kernel(pmd, address); + pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; printk("PTE %lx", pte_val(*pte)); ret: --- linux-2.6.2-rc1/arch/x86_64/mm/k8topology.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/mm/k8topology.c 2004-01-21 23:30:33.000000000 -0800 @@ -48,6 +48,7 @@ int __init k8_scan_nodes(unsigned long s int nodeid, i, nb; int found = 0; int nmax; + int rr; nb = find_northbridge(); if (nb < 0) @@ -153,7 +154,7 @@ int __init k8_scan_nodes(unsigned long s mapping. To avoid this fill in the mapping for all possible CPUs, as the number of CPUs is not known yet. We round robin the existing nodes. */ - int rr = 0; + rr = 0; for (i = 0; i < MAXNODE; i++) { if (nodes_present & (1UL< The functions are named readb, readw, readl, - readq, writeb, + readq, readb_relaxed, + readw_relaxed, readl_relaxed, + readq_relaxed, writeb, writew, writel and writeq. @@ -159,6 +161,18 @@ author cares. This kind of property cannot be hidden from driver writers in the API. + + + PCI ordering rules also guarantee that PIO read responses arrive + after any outstanding DMA writes on that bus, since for some devices + the result of a readb call may signal to the + driver that a DMA transaction is complete. In many cases, however, + the driver may want to indicate that the next + readb call has no relation to any previous DMA + writes performed by the device. The driver can use + readb_relaxed for these cases, although only + some platforms will honor the relaxed semantics. + --- linux-2.6.2-rc1/Documentation/fb/intel810.txt 2003-06-14 12:18:25.000000000 -0700 +++ 25/Documentation/fb/intel810.txt 2004-01-21 23:30:41.000000000 -0800 @@ -194,7 +194,7 @@ Using the same setup as described above, modprobe i810fb vram=2 xres=1024 bpp=8 hsync1=30 hsync2=55 vsync1=50 \ vsync2=85 accel=1 mtrr=1 -Or just add the following to /etc/modules.conf +Or just add the following to /etc/modprobe.conf options i810fb vram=2 xres=1024 bpp=16 hsync1=30 hsync2=55 vsync1=50 \ vsync2=85 accel=1 mtrr=1 --- linux-2.6.2-rc1/Documentation/filesystems/proc.txt 2003-11-09 16:45:05.000000000 -0800 +++ 25/Documentation/filesystems/proc.txt 2004-01-22 00:01:20.000000000 -0800 @@ -900,6 +900,15 @@ super-nr shows the number of currently a Every mounted file system needs a super block, so if you plan to mount lots of file systems, you may want to increase these numbers. +aio-nr and aio-max-nr +--------------------- + +aio-nr is the running total of the number of events specified on the +io_setup system call for all currently active aio contexts. If aio-nr +reaches aio-max-nr then io_setup will fail with EAGAIN. Note that +raising aio-max-nr does not result in the pre-allocation or re-sizing +of any kernel data structures. + 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats ----------------------------------------------------------- --- linux-2.6.2-rc1/Documentation/ftape.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/ftape.txt 2004-01-21 23:30:41.000000000 -0800 @@ -242,15 +242,15 @@ C. Boot and load time configuration Module parameters can be specified either directly when invoking the program 'insmod' at the shell prompt: - insmod ftape.o ft_tracing=4 + modprobe ftape ft_tracing=4 - or by editing the file `/etc/modules.conf' in which case they take + or by editing the file `/etc/modprobe.conf' in which case they take effect each time when the module is loaded with `modprobe' (please refer to the respective manual pages). Thus, you should add a line options ftape ft_tracing=4 - to `/etc/modules.conf` if you intend to increase the debugging + to `/etc/modprobe.conf` if you intend to increase the debugging output of the driver. @@ -298,7 +298,7 @@ C. Boot and load time configuration 5. Example module parameter setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To do the same, but with ftape compiled as a loadable kernel - module, add the following line to `/etc/modules.conf': + module, add the following line to `/etc/modprobe.conf': options ftape ft_probe_fc10=1 ft_tracing=4 --- linux-2.6.2-rc1/Documentation/hayes-esp.txt 2003-06-14 12:17:57.000000000 -0700 +++ 25/Documentation/hayes-esp.txt 2004-01-21 23:30:41.000000000 -0800 @@ -109,7 +109,7 @@ option with a space. For example: insmod esp dma=3 trigger=512 The esp module can be automatically loaded when needed. To cause this to -happen, add the following lines to /etc/modules.conf (replacing the last line +happen, add the following lines to /etc/modprobe.conf (replacing the last line with options for your configuration): alias char-major-57 esp --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/andthen 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,100 @@ + +define set_andthen + set var $thp=0 + set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] + set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) + set var $at_oc=kgdb_and_then_count + set var $at_cc=$at_oc +end + +define andthen_next + set var $at_cc=$arg0 +end + +define andthen + andthen_set_edge + if ($at_cc >= $at_oc) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc + output *($thp+($at_cc++ % $at_size )) + printf "\n" + end +end +define andthen_set_edge + set var $at_oc=kgdb_and_then_count + set var $at_low = $at_oc - $at_size + if ($at_low < 0 ) + set var $at_low = 0 + end + if (( $at_cc > $at_oc) || ($at_cc < $at_low)) + printf "Count outside of window, setting count to " + if ($at_cc >= $at_oc) + set var $at_cc = $at_oc + else + set var $at_cc = $at_low + end + printf "%d\n",$at_cc + end +end + +define beforethat + andthen_set_edge + if ($at_cc <= $at_low) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc-1 + output *($thp+(--$at_cc % $at_size )) + printf "\n" + end +end + +document andthen_next + andthen_next + . sets the number of the event to display next. If this event + . is not in the event pool, either andthen or beforethat will + . correct it to the nearest event pool edge. The event pool + . ends at the last event recorded and begins + . prior to that. If beforethat is used next, it will display + . event -1. +. + andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + + +document andthen + andthen +. displays the next event in the list. sets up to display +. the oldest saved event first. +. (optional) count of the event to display. +. note the number of events saved is specified at configure time. +. if events are saved between calls to andthen the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document set_andthen + set_andthen +. sets up to use the and commands. +. if you have defined your own struct, use the above and +. then enter the following: +. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] +. where is the name of your structure. +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document beforethat + beforethat +. displays the next prior event in the list. sets up to +. display the last occuring event first. +. +. note the number of events saved is specified at configure time. +. if events are saved between calls to beforethat the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/debug-nmi.txt 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,37 @@ +Subject: Debugging with NMI +Date: Mon, 12 Jul 1999 11:28:31 -0500 +From: David Grothe +Organization: Gcom, Inc +To: David Grothe + +Kernel hackers: + +Maybe this is old hat, but it is new to me -- + +On an ISA bus machine, if you short out the A1 and B1 pins of an ISA +slot you will generate an NMI to the CPU. This interrupts even a +machine that is hung in a loop with interrupts disabled. Used in +conjunction with kgdb < +ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can +gain debugger control of a machine that is hung in the kernel! Even +without kgdb the kernel will print a stack trace so you can find out +where it was hung. + +The A1/B1 pins are directly opposite one another and the farthest pins +towards the bracket end of the ISA bus socket. You can stick a paper +clip or multi-meter probe between them to short them out. + +I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the +board consists of two rows of wire wrap pins. So I wired a push button +between the A1/B1 pins and now have an ISA board that I can stick into +any ISA bus slot for debugger entry. + +Microsoft has a circuit diagram of a PCI card at +http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to +build one you will have to mail them and ask for the PAL equations. +Nobody makes one comercially. + +[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if +your machine catches fire, it is your problem, not mine.] + +-- Dave (the kgdb guy) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdb-globals.txt 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,71 @@ +Sender: akale@veritas.com +Date: Fri, 23 Jun 2000 19:26:35 +0530 +From: "Amit S. Kale" +Organization: Veritas Software (India) +To: Dave Grothe , linux-kernel@vger.rutgers.edu +CC: David Milburn , + "Edouard G. Parmelan" , + ezannoni@cygnus.com, Keith Owens +Subject: Re: Module debugging using kgdb + +Dave Grothe wrote: +> +> Amit: +> +> There is a 2.4.0 version of kgdb on our ftp site: +> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb +> and loadmodule.sh there. +> +> Have a look at the README file and see if I go it right. If not, send +> me some corrections and I will update it. +> +> Does your version of gdb solve the global variable problem? + +Yes. +Thanks to Elena Zanoni, gdb (developement version) can now calculate +correctly addresses of dynamically loaded object files. I have not been +following gdb developement for sometime and am not sure when symbol +address calculation fix is going to appear in a gdb stable version. + +Elena, any idea when the fix will make it to a prebuilt gdb from a +redhat release? + +For the time being I have built a gdb developement version. It can be +used for module debugging with loadmodule.sh script. + +The problem with calculating of module addresses with previous versions +of gdb was as follows: +gdb did not use base address of a section while calculating address of +a symbol in the section in an object file loaded via 'add-symbol-file'. +It used address of .text segment instead. Due to this addresses of +symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. + +Above mentioned fix allow gdb to use base address of a segment while +calculating address of a symbol in it. It adds a parameter '-s' to +'add-symbol-file' command for specifying base address of a segment. + +loadmodule.sh script works as follows. + +1. Copy a module file to target machine. +2. Load the module on the target machine using insmod with -m parameter. +insmod produces a module load map which contains base addresses of all +sections in the module and addresses of symbols in the module file. +3. Find all sections and their base addresses in the module from +the module map. +4. Generate a script that loads the module file. The script uses +'add-symbol-file' and specifies address of text segment followed by +addresses of all segments in the module. + +Here is an example gdb script produced by loadmodule.sh script. + +add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 +-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 +-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 + +With this command gdb can calculate addresses of symbols in ANY segment +in a module file. + +Regards. +-- +Amit Kale +Veritas Software ( http://www.veritas.com ) --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,14 @@ +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 +target remote /dev/ttyS0 +define si +stepi +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip +end +define ni +nexti +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit.hw 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,117 @@ + +#Using ia-32 hardware breakpoints. +# +#4 hardware breakpoints are available in ia-32 processors. These breakpoints +#do not need code modification. They are set using debug registers. +# +#Each hardware breakpoint can be of one of the +#three types: execution, write, access. +#1. An Execution breakpoint is triggered when code at the breakpoint address is +#executed. +#2. A write breakpoint ( aka watchpoints ) is triggered when memory location +#at the breakpoint address is written. +#3. An access breakpoint is triggered when memory location at the breakpoint +#address is either read or written. +# +#As hardware breakpoints are available in limited number, use software +#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. +# +#Length of an access or a write breakpoint defines length of the datatype to +#be watched. Length is 1 for char, 2 short , 3 int. +# +#For placing execution, write and access breakpoints, use commands +#hwebrk, hwwbrk, hwabrk +#To remove a breakpoint use hwrmbrk command. +# +#These commands take following types of arguments. For arguments associated +#with each command, use help command. +#1. breakpointno: 0 to 3 +#2. length: 1 to 3 +#3. address: Memory location in hex ( without 0x ) e.g c015e9bc +# +#Use the command exinfo to find which hardware breakpoint occured. + +#hwebrk breakpointno address +define hwebrk + maintenance packet Y$arg0,0,0,$arg1 +end +document hwebrk + hwebrk
+ Places a hardware execution breakpoint + = 0 - 3 +
= Hex digits without leading "0x". +end + +#hwwbrk breakpointno length address +define hwwbrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwwbrk + hwwbrk
+ Places a hardware write breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwabrk breakpointno length address +define hwabrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwabrk + hwabrk
+ Places a hardware access breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwrmbrk breakpointno +define hwrmbrk + maintenance packet y$arg0 +end +document hwrmbrk + hwrmbrk + = 0 - 3 + Removes a hardware breakpoint +end + +define reboot + maintenance packet r +end +#exinfo +define exinfo + maintenance packet qE +end +document exinfo + exinfo + Gives information about a breakpoint. +end +define get_th + p $th=(struct thread_info *)((int)$esp & ~8191) +end +document get_th + get_tu + Gets and prints the current thread_info pointer, Defines th to be it. +end +define get_cu + p $cu=((struct thread_info *)((int)$esp & ~8191))->task +end +document get_cu + get_cu + Gets and print the "current" value. Defines $cu to be it. +end +define int_off + set var $flags=$eflags + set $eflags=$eflags&~0x200 + end +define int_on + set var $eflags|=$flags&0x200 + end +document int_off + saves the current interrupt state and clears the processor interrupt + flag. Use int_on to restore the saved flag. +end +document int_on + Restores the interrupt flag saved by int_off. +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit-modules 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,146 @@ +# +# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. +# +# This don't work for Linux-2.0 or older. +# +# Author Edouard G. Parmelan +# +# +# Fri Apr 30 20:33:29 CEST 1999 +# First public release. +# +# Major cleanup after experiment Linux-2.0 kernel without success. +# Symbols of a module are not in the correct order, I can't explain +# why :( +# +# Fri Mar 19 15:41:40 CET 1999 +# Initial version. +# +# Thu Jan 6 16:29:03 CST 2000 +# A little fixing by Dave Grothe +# +# Mon Jun 19 09:33:13 CDT 2000 +# Alignment changes from Edouard Parmelan +# +# The basic idea is to find where insmod load the module and inform +# GDB to load the symbol table of the module with the GDB command +# ``add-symbol-file
''. +# +# The Linux kernel holds the list of all loaded modules in module_list, +# this list end with &kernel_module (exactly with module->next == NULL, +# but the last module is not a real module). +# +# Insmod allocates the struct module before the object file. Since +# Linux-2.1, this structure contain his size. The real address of +# the object file is then (char*)module + module->size_of_struct. +# +# You can use three user functions ``mod-list'', ``mod-print-symbols'' +# and ``add-module-symbols''. +# +# mod-list list all loaded modules with the format: +# +# +# As soon as you have found the address of your module, you can +# print its exported symbols (mod-print-symbols) or inform GDB to add +# symbols from your module file (mod-add-symbols). +# +# The argument that you give to mod-print-symbols or mod-add-symbols +# is the from the mod-list command. +# +# When using the mod-add-symbols command you must also give the full +# pathname of the modules object code file. +# +# The command mod-add-lis is an example of how to make this easier. +# You can edit this macro to contain the path name of your own +# favorite module and then use it as a shorthand to load it. You +# still need the module-address, however. +# +# The internal function ``mod-validate'' set the GDB variable $mod +# as a ``struct module*'' if the kernel known the module otherwise +# $mod is set to NULL. This ensure to not add symbols for a wrong +# address. +# +# Have a nice hacking day ! +# +# +define mod-list + set $mod = (struct module*)module_list + # the last module is the kernel, ignore it + while $mod != &kernel_module + printf "%p\t%s\n", (long)$mod, ($mod)->name + set $mod = $mod->next + end +end +document mod-list +List all modules in the form: +Use the as the argument for the other +mod-commands: mod-print-symbols, mod-add-symbols. +end + +define mod-validate + set $mod = (struct module*)module_list + while ($mod != $arg0) && ($mod != &kernel_module) + set $mod = $mod->next + end + if $mod == &kernel_module + set $mod = 0 + printf "%p is not a module\n", $arg0 + end +end +document mod-validate +mod-validate +Internal user-command used to validate the module parameter. +If is a real loaded module, set $mod to it otherwise set $mod to 0. +end + + +define mod-print-symbols + mod-validate $arg0 + if $mod != 0 + set $i = 0 + while $i < $mod->nsyms + set $sym = $mod->syms[$i] + printf "%p\t%s\n", $sym->value, $sym->name + set $i = $i + 1 + end + end +end +document mod-print-symbols +mod-print-symbols +Print all exported symbols of the module. see mod-list +end + + +define mod-add-symbols-align + mod-validate $arg0 + if $mod != 0 + set $mod_base = ($mod->size_of_struct + (long)$mod) + if ($arg2 != 0) && (($mod_base & ($arg2 - 1)) != 0) + set $mod_base = ($mod_base | ($arg2 - 1)) + 1 + end + add-symbol-file $arg1 $mod_base + end +end +document mod-add-symbols-align +mod-add-symbols-align +Load the symbols table of the module from the object file where +first section aligment is . +To retreive alignment, use `objdump -h '. +end + +define mod-add-symbols + mod-add-symbols-align $arg0 $arg1 sizeof(long) +end +document mod-add-symbols +mod-add-symbols +Load the symbols table of the module from the object file. +Default alignment is 4. See mod-add-symbols-align. +end + +define mod-add-lis + mod-add-symbols-align $arg0 /usr/src/LiS/streams.o 16 +end +document mod-add-lis +mod-add-lis +Does mod-add-symbols /usr/src/LiS/streams.o +end --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdbeth.txt 2004-01-21 23:26:59.000000000 -0800 @@ -0,0 +1,92 @@ +KGDB over ethernet +================== + +Authors +------- + +Robert Walsh (2.6 port) +wangdi (2.6 port) +Matt Mackall (netpoll api) +San Mehat (original 2.4 code) + + +Introduction +------------ + +KGDB supports debugging over ethernet (kgdboe) via polling of a given +network interface. Most cards should be supported automatically. +Debugging facilities are available as soon as the network driver and +kgdboe have initialized. Unfortunately, this is too late in the boot +process for debugging some issues, but works quite well for many +others. This should not interfere with normal network usage and +doesn't require a dedicated NIC. + +Terminology +----------- + +This document uses the following terms: + + TARGET: the machine being debugged. + HOST: the machine running gdb. + + +Usage +----- + +You need to use the following command-line option on the TARGET kernel: + + kgdboe=[tgt-port]@/[dev],[host-port]@/[host-macaddr] + + where + tgt-port source for UDP packets (defaults to 6443) + tgt-ip source IP to use (interface address) + dev network interface (eth0) + host-port HOST UDP port (6442) (not really used) + host-ip IP address for HOST machine + host-macaddr ethernet MAC address for HOST (ff:ff:ff:ff:ff:ff) + + examples: + + kgdboe=7000@192.168.0.1/eth1,7001@192.168.0.2/00:05:3C:04:47:5D + this machine is 192.168.0.1 on eth1 + remote machine is 192.168.0.2 with MAC address 00:05:3C:04:47:5D + listen for gdb packets on port 7000 + send unsolicited gdb packets to port 7001 + + kgdboe=@192.168.0.1/,@192.168.0.2/ + this machine is 192.168.0.1 on default interface eth0 + remote machine is 192.168.0.2, use default broadcast MAC address + listen for gdb packets on default port 6443 + send unsolicited gdb packets to port 6442 + +Only packets originating from the configured HOST IP address will be +accepted by the debugger. + +On the HOST side, run gdb as normal and use a remote UDP host as the +target: + + % gdb ./vmlinux + GNU gdb Red Hat Linux (5.3post-0.20021129.18rh) + Copyright 2003 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux-gnu"... + (gdb) target remote udp:HOSTNAME:6443 + +You can now continue as if you were debugging over a serial line. + +Limitations +----------- + +The current release of this code is exclusive of using kgdb on a +serial interface, so you must boot without the kgdboe option to use +serial debugging. Trying to debug the network driver while using it +will prove interesting. + +Bug reports +----------- + +Send bug reports to Robert Walsh and Matt +Mackall . --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdb.txt 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,775 @@ +Last edit: <20030806.1637.12> +This file has information specific to the i386 kgdb option. Other +platforms with the kgdb option may behave in a similar fashion. + +New features: +============ +20030806.1557.37 +This version was made against the 2.6.0-test2 kernel. We have made the +following changes: + +- The getthread() code in the stub calls find_task_by_pid(). It fails + if we are early in the bring up such that the pid arrays have yet to + be allocated. We have added a line to kernel/pid.c to make + "kgdb_pid_init_done" true once the arrays are allocated. This way the + getthread() code knows not to call. This is only used by the thread + debugging stuff and threads will not yet exist at this point in the + boot. + +- For some reason, gdb was not asking for a new thread list when the + "info thread" command was given. We changed to the newer version of + the thread info command and gdb now seems to ask when needed. Result, + we now get all threads in the thread list. + +- We now respond to the ThreadExtraInfo request from gdb with the thread + name from task_struct .comm. This then appears in the thread list. + Thoughts on additional options for this are welcome. Things such as + "has BKL" and "Preempted" come to mind. I think we could have a flag + word that could enable different bits of info here. + +- We now honor, sort of, the C and S commands. These are continue and + single set after delivering a signal. We ignore the signal and do the + requested action. This only happens when we told gdb that a signal + was the reason for entry, which is only done on memory faults. The + result is that you can now continue into the Oops. + +- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, + but it is more exact on what language to use. + +- We added two dwarf2 include files and a bit of code at the end of + entry.S. This does not yet work, so it is disabled. Still we want to + keep track of the code and "maybe" someone out there can fix it. + +- Randy Dunlap sent some fix ups for this file which are now merged. + +- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a + compiler warning if CONFIG_KGDB is off (now who would do that :). + +- Andrew Morton sent a fix for the serial driver which is now merged. + +- Andrew also sent a change to the stub around the cpu managment code + which is also merged. + +- Andrew also sent a patch to make "f" as well as "g" work as SysRq + commands to enter kgdb, merged. + +- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a + "who" field to the spinlock data struct. This is filled with + "current" when ever the spinlock suceeds. Useful if you want to know + who has the lock. + +_ And last, but not least, we fixed the "get_cu" macro to properly get + the current value of "current". + +New features: +============ +20030505.1827.27 +We are starting to align with the sourceforge version, at least in +commands. To this end, the boot command string to start kgdb at +boot time has been changed from "kgdb" to "gdb". + +Andrew Morton sent a couple of patches which are now included as follows: +1.) We now return a flag to the interrupt handler. +2.) We no longer use smp_num_cpus (a conflict with the lock meter). +3.) And from William Lee Irwin III code to make + sure high-mem is set up before we attempt to register our interrupt + handler. +We now include asm/kgdb.h from config.h so you will most likely never +have to include it. It also 'NULLS' the kgdb macros you might have in +your code when CONFIG_KGDB is not defined. This allows you to just +turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. +This include is conditioned on the machine being an x86 so as to not +mess with other archs. + +20020801.1129.03 +This is currently the version for the 2.4.18 (and beyond?) kernel. + +We have several new "features" beginning with this version: + +1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more + waiting and it will pull that guy out of an IRQ off spin lock :) + +2.) We doctored up the code that tells where a task is waiting and + included it so that the "info thread" command will show a bit more + than "schedule()". Try it... + +3.) Added the ability to call a function from gdb. All the standard gdb + issues apply, i.e. if you hit a breakpoint in the function, you are + not allowed to call another (gdb limitation, not kgdb). To help + this capability we added a memory allocation function. Gdb does not + return this memory (it is used for strings that you pass to that function + you are calling from gdb) so we fixed up a way to allow you to + manually return the memory (see below). + +4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the + interrupt flag to now also include the preemption count and the + "in_interrupt" info. The flag is now called "with_pif" to indicate + the order, preempt_count, in_interrupt, flag. The preempt_count is + shifted left by 4 bits so you can read the count in hex by dropping + the low order digit. In_interrupt is in bit 1, and the flag is in + bit 0. + +5.) The command: "p kgdb_info" is now expanded and prints something + like: +(gdb) p kgdb_info +$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, + errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, + cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, + regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} + + Things to note here: a.) used_malloc is the amount of memory that + has been malloc'ed to do calls from gdb. You can reclaim this + memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) + cpus_waiting is now "sized" by the number of CPUs you enter at + configure time in the kgdb configure section. This is NOT used + anywhere else in the system, but it is "nice" here. c.) The task's + "pid" is now in the structure. This is the pid you will need to use + to decode to the thread id to get gdb to look at that thread. + Remember that the "info thread" command prints a list of threads + wherein it numbers each thread with its reference number followed + by the thread's pid. Note that the per-CPU idle threads actually + have pids of 0 (yes, there is more than one pid 0 in an SMP system). + To avoid confusion, kgdb numbers these threads with numbers beyond + the MAX_PID. That is why you see 32768 and above. + +6.) A subtle change, we now provide the complete register set for tasks + that are active on the other CPUs. This allows better trace back on + those tasks. + + And, let's mention what we could not fix. Back-trace from all but the + thread that we trapped will, most likely, have a bogus entry in it. + The problem is that gdb does not recognize the entry code for + functions that use "current" near (at all?) the entry. The compiler + is putting the "current" decode as the first two instructions of the + function where gdb expects to find %ebp changing code. Back trace + also has trouble with interrupt frames. I am talking with Daniel + Jacobowitz about some way to fix this, but don't hold your breath. + +20011220.0050.35 +Major enhancement with this version is the ability to hold one or more +CPUs in an SMP system while allowing the others to continue. Also, by +default only the current CPU is enabled on single-step commands (please +note that gdb issues single-step commands at times other than when you +use the si command). + +Another change is to collect some useful information in +a global structure called "kgdb_info". You should be able to just: + +p kgdb_info + +although I have seen cases where the first time this is done gdb just +prints the first member but prints the whole structure if you then enter +CR (carriage return or enter). This also works: + +p *&kgdb_info + +Here is a sample: +(gdb) p kgdb_info +$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, + vector = 3, print_debug_info = 0} + +"Called_from" is the return address from the current entry into kgdb. +Sometimes it is useful to know why you are in kgdb, for example, was +it an NMI or a real breakpoint? The simple way to interrogate this +return address is: + +l *0xc010732c + +which will print the surrounding few lines of source code. + +"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the +kgdb_ts entries). + +"errcode" and "vector" are other entry parameters which may be helpful on +some traps. + +"print_debug_info" is the internal debugging kgdb print enable flag. Yes, +you can modify it. + +In SMP systems kgdb_info also includes the "cpus_waiting" structure and +"hold_on_step": + +(gdb) p kgdb_info +$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, + vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ + task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, + regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}}} + +"Cpus_waiting" has an entry for each CPU other than the current one that +has been stopped. Each entry contains the task_struct address for that +CPU, the address of the regs for that task and a hold flag. All these +have the proper typing so that, for example: + +p *kgdb_info.cpus_waiting[1].regs + +will print the registers for CPU 1. + +"Hold_on_sstep" is a new feature with this version and comes up set or +true. What this means is that whenever kgdb is asked to single-step all +other CPUs are held (i.e. not allowed to execute). The flag applies to +all but the current CPU and, again, can be changed: + +p kgdb_info.hold_on_sstep=0 + +restores the old behavior of letting all CPUs run during single-stepping. + +Likewise, each CPU has a "hold" flag, which if set, locks that CPU out +of execution. Note that this has some risk in cases where the CPUs need +to communicate with each other. If kgdb finds no CPU available on exit, +it will push a message thru gdb and stay in kgdb. Note that it is legal +to hold the current CPU as long as at least one CPU can execute. + +20010621.1117.09 +This version implements an event queue. Events are signaled by calling +a function in the kgdb stub and may be examined from gdb. See EVENTS +below for details. This version also tightens up the interrupt and SMP +handling to not allow interrupts on the way to kgdb from a breakpoint +trap. It is fine to allow these interrupts for user code, but not +system debugging. + +Version +======= + +This version of the kgdb package was developed and tested on +kernel version 2.4.16. It will not install on any earlier kernels. +It is possible that it will continue to work on later versions +of 2.4 and then versions of 2.5 (I hope). + + +Debugging Setup +=============== + +Designate one machine as the "development" machine. This is the +machine on which you run your compiles and which has your source +code for the kernel. Designate a second machine as the "target" +machine. This is the machine that will run your experimental +kernel. + +The two machines will be connected together via a serial line out +one or the other of the COM ports of the PC. You will need the +appropriate modem eliminator (null modem) cable(s) for this. + +Decide on which tty port you want the machines to communicate, then +connect them up back-to-back using the null modem cable. COM1 is +/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection +with the two machines prior to trying to debug a kernel. Once you +have it working, on the TARGET machine, enter: + +setserial /dev/ttyS0 (or what ever tty you are using) + +and record the port address and the IRQ number. + +On the DEVELOPMENT machine you need to apply the patch for the kgdb +hooks. You have probably already done that if you are reading this +file. + +On your DEVELOPMENT machine, go to your kernel source directory and do +"make Xconfig" where X is one of "x", "menu", or "". If you are +configuring in the standard serial driver, it must not be a module. +Either yes or no is ok, but making the serial driver a module means it +will initialize after kgdb has set up the UART interrupt code and may +cause a failure of the control-C option discussed below. The configure +question for the serial driver is under the "Character devices" heading +and is: + +"Standard/generic (8250/16550 and compatible UARTs) serial support" + +Go down to the kernel debugging menu item and open it up. Enable the +kernel kgdb stub code by selecting that item. You can also choose to +turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler +to put more debug info (like local symbols) in the object file. On the +i386 -g and -ggdb are the same so this option just reduces to "O1". The +-O1 reduces the optimization level. This may be helpful in some cases, +be aware, however, that this may also mask the problem you are looking +for. + +The baud rate. Default is 115200. What ever you choose be sure that +the host machine is set to the same speed. I recommend the default. + +The port. This is the I/O address of the serial UART that you should +have gotten using setserial as described above. The standard COM1 port +(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ +3. + +The port IRQ (see above). + +Stack overflow test. This option makes a minor change in the trap, +system call and interrupt code to detect stack overflow and transfer +control to kgdb if it happens. (Some platforms have this in the +baseline code, but the i386 does not.) + +You can also configure the system to recognize the boot option +"console=kgdb" which if given will cause all console output during +booting to be put thru gdb as well as other consoles. This option +requires that gdb and kgdb be connected prior to sending console output +so, if they are not, a breakpoint is executed to force the connection. +This will happen before any kernel output (it is going thru gdb, right), +and will stall the boot until the connection is made. + +You can also configure in a patch to SysRq to enable the kGdb SysRq. +This request generates a breakpoint. Since the serial port IRQ line is +set up after any serial drivers, it is possible that this command will +work when the control-C will not. + +Save and exit the Xconfig program. Then do "make clean" , "make dep" +and "make bzImage" (or whatever target you want to make). This gets the +kernel compiled with the "-g" option set -- necessary for debugging. + +You have just built the kernel on your DEVELOPMENT machine that you +intend to run on your TARGET machine. + +To install this new kernel, use the following installation procedure. +Remember, you are on the DEVELOPMENT machine patching the kernel source +for the kernel that you intend to run on the TARGET machine. + +Copy this kernel to your target machine using your usual procedures. I +usually arrange to copy development: +/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine +via a LAN based NFS access. That is, I run the cp command on the target +and copy from the development machine via the LAN. Run Lilo (see "man +lilo" for details on how to set this up) on the new kernel on the target +machine so that it will boot! Then boot the kernel on the target +machine. + +On the DEVELOPMENT machine, create a file called .gdbinit in the +directory /usr/src/linux. An example .gdbinit file looks like this: + +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 (or what ever speed you have chosen) +target remote /dev/ttyS0 + + +Change the "echo" and "target" definition so that it specifies the tty +port that you intend to use. Change the "remotebaud" definition to +match the data rate that you are going to use for the com line. + +You are now ready to try it out. + +Boot your target machine with "kgdb" in the boot command i.e. something +like: + +lilo> test kgdb + +or if you also want console output thru gdb: + +lilo> test kgdb console=kgdb + +You should see the lilo message saying it has loaded the kernel and then +all output stops. The kgdb stub is trying to connect with gdb. Start +gdb something like this: + + +On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". +When gdb gets the symbols loaded it will read your .gdbinit file and, if +everything is working correctly, you should see gdb print out a few +lines indicating that a breakpoint has been taken. It will actually +show a line of code in the target kernel inside the kgdb activation +code. + +The gdb interaction should look something like this: + + linux-dev:/usr/src/linux# gdb vmlinux + GDB is free software and you are welcome to distribute copies of it + under certain conditions; type "show copying" to see the conditions. + There is absolutely no warranty for GDB; type "show warranty" for details. + GDB 4.15.1 (i486-slackware-linux), + Copyright 1995 Free Software Foundation, Inc... + breakpoint () at i386-stub.c:750 + 750 } + (gdb) + +You can now use whatever gdb commands you like to set breakpoints. +Enter "continue" to start your target machine executing again. At this +point the target system will run at full speed until it encounters +your breakpoint or gets a segment violation in the kernel, or whatever. + +If you have the kgdb console enabled when you continue, gdb will print +out all the console messages. + +The above example caused a breakpoint relatively early in the boot +process. For the i386 kgdb it is possible to code a break instruction +as the first C-language point in init/main.c, i.e. as the first instruction +in start_kernel(). This could be done as follows: + +#include + breakpoint(); + +This breakpoint() is really a function that sets up the breakpoint and +single-step hardware trap cells and then executes a breakpoint. Any +early hard coded breakpoint will need to use this function. Once the +trap cells are set up they need not be set again, but doing it again +does not hurt anything, so you don't need to be concerned about which +breakpoint is hit first. Once the trap cells are set up (and the kernel +sets them up in due course even if breakpoint() is never called) the +macro: + +BREAKPOINT; + +will generate an inline breakpoint. This may be more useful as it stops +the processor at the instruction instead of in a function a step removed +from the location of interest. In either case must be +included to define both breakpoint() and BREAKPOINT. + +Triggering kgdbstub at other times +================================== + +Often you don't need to enter the debugger until much later in the boot +or even after the machine has been running for some time. Once the +kernel is booted and interrupts are on, you can force the system to +enter the debugger by sending a control-C to the debug port. This is +what the first line of the recommended .gdbinit file does. This allows +you to start gdb any time after the system is up as well as when the +system is already at a breakpoint. (In the case where the system is +already at a breakpoint the control-C is not needed, however, it will +be ignored by the target so no harm is done. Also note the the echo +command assumes that the port speed is already set. This will be true +once gdb has connected, but it is best to set the port speed before you +run gdb.) + +Another simple way to do this is to put the following file in you ~/bin +directory: + +#!/bin/bash +echo -e "\003" > /dev/ttyS0 + +Here, the ttyS0 should be replaced with what ever port you are using. +The "\003" is control-C. Once you are connected with gdb, you can enter +control-C at the command prompt. + +An alternative way to get control to the debugger is to enable the kGdb +SysRq command. Then you would enter Alt-SysRq-g (all three keys at the +same time, but push them down in the order given). To refresh your +memory of the available SysRq commands try Alt-SysRq-=. Actually any +undefined command could replace the "=", but I like to KNOW that what I +am pushing will never be defined. + +Debugging hints +=============== + +You can break into the target machine at any time from the development +machine by typing ^C (see above paragraph). If the target machine has +interrupts enabled this will stop it in the kernel and enter the +debugger. + +There is unfortunately no way of breaking into the kernel if it is +in a loop with interrupts disabled, so if this happens to you then +you need to place exploratory breakpoints or printk's into the kernel +to find out where it is looping. The exploratory breakpoints can be +entered either thru gdb or hard coded into the source. This is very +handy if you do something like: + +if () BREAKPOINT; + + +There is a copy of an e-mail in the Documentation/i386/kgdb/ directory +(debug-nmi.txt) which describes how to create an NMI on an ISA bus +machine using a paper clip. I have a sophisticated version of this made +by wiring a push button switch into a PC104/ISA bus adapter card. The +adapter card nicely furnishes wire wrap pins for all the ISA bus +signals. + +When you are done debugging the kernel on the target machine it is a +good idea to leave it in a running state. This makes reboots faster, +bypassing the fsck. So do a gdb "continue" as the last gdb command if +this is possible. To terminate gdb itself on the development machine +and leave the target machine running, first clear all breakpoints and +continue, then type ^Z to suspend gdb and then kill it with "kill %1" or +something similar. + +If gdbstub Does Not Work +======================== + +If it doesn't work, you will have to troubleshoot it. Do the easy +things first like double checking your cabling and data rates. You +might try some non-kernel based programs to see if the back-to-back +connection works properly. Just something simple like cat /etc/hosts +>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you +if you can send data from one machine to the other. Make sure it works +in both directions. There is no point in tearing out your hair in the +kernel if the line doesn't work. + +All of the real action takes place in the file +/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target +machine that interacts with gdb on the development machine. In gdb you can +turn on a debug switch with the following command: + + set remotedebug + +This will print out the protocol messages that gdb is exchanging with +the target machine. + +Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is +the code that talks to the serial port on the target side. There might +be a problem there. In particular there is a section of this code that +tests the UART which will tell you what UART you have if you define +"PRNT" (just remove "_off" from the #define PRNT_off). To view this +report you will need to boot the system without any beakpoints. This +allows the kernel to run to the point where it calls kgdb to set up +interrupts. At this time kgdb will test the UART and print out the type +it finds. (You need to wait so that the printks are actually being +printed. Early in the boot they are cached, waiting for the console to +be enabled. Also, if kgdb is entered thru a breakpoint it is possible +to cause a dead lock by calling printk when the console is locked. The +stub thus avoids doing printks from breakpoints, especially in the +serial code.) At this time, if the UART fails to do the expected thing, +kgdb will print out (using printk) information on what failed. (These +messages will be buried in all the other boot up messages. Look for +lines that start with "gdb_hook_interrupt:". You may want to use dmesg +once the system is up to view the log. If this fails or if you still +don't connect, review your answers for the port address. Use: + +setserial /dev/ttyS0 + +to get the current port and IRQ information. This command will also +tell you what the system found for the UART type. The stub recognizes +the following UART types: + +16450, 16550, and 16550A + +If you are really desperate you can use printk debugging in the +kgdbstub code in the target kernel until you get it working. In particular, +there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c +named "remote_debug". Compile your kernel with this set to 1, rather +than 0 and the debug stub will print out lots of stuff as it does +what it does. Likewise there are debug printks in the kgdb_serial.c +code that can be turned on with simple changes in the macro defines. + + +Debugging Loadable Modules +========================== + +This technique comes courtesy of Edouard Parmelan + + +When you run gdb, enter the command + +source gdbinit-modules + +This will read in a file of gdb macros that was installed in your +kernel source directory when kgdb was installed. This file implements +the following commands: + +mod-list + Lists the loaded modules in the form + +mod-print-symbols + Prints all the symbols in the indicated module. + +mod-add-symbols + Loads the symbols from the object file and associates them + with the indicated module. + +After you have loaded the module that you want to debug, use the command +mod-list to find the of your module. Then use that +address in the mod-add-symbols command to load your module's symbols. +From that point onward you can debug your module as if it were a part +of the kernel. + +The file gdbinit-modules also contains a command named mod-add-lis as +an example of how to construct a command of your own to load your +favorite module. The idea is to "can" the pathname of the module +in the command so you don't have to type so much. + +Threads +======= + +Each process in a target machine is seen as a gdb thread. gdb thread +related commands (info threads, thread n) can be used. + +ia-32 hardware breakpoints +========================== + +kgdb stub contains support for hardware breakpoints using debugging features +of ia-32(x86) processors. These breakpoints do not need code modification. +They use debugging registers. 4 hardware breakpoints are available in ia-32 +processors. + +Each hardware breakpoint can be of one of the following three types. + +1. Execution breakpoint - An Execution breakpoint is triggered when code + at the breakpoint address is executed. + + As limited number of hardware breakpoints are available, it is + advisable to use software breakpoints ( break command ) instead + of execution hardware breakpoints, unless modification of code + is to be avoided. + +2. Write breakpoint - A write breakpoint is triggered when memory + location at the breakpoint address is written. + + A write or can be placed for data of variable length. Length of + a write breakpoint indicates length of the datatype to be + watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for + 4 byte data. + +3. Access breakpoint - An access breakpoint is triggered when memory + location at the breakpoint address is either read or written. + + Access breakpoints also have lengths similar to write breakpoints. + +IO breakpoints in ia-32 are not supported. + +Since gdb stub at present does not use the protocol used by gdb for hardware +breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros +for hardware breakpoints are described below. + +hwebrk - Places an execution breakpoint + hwebrk breakpointno address +hwwbrk - Places a write breakpoint + hwwbrk breakpointno length address +hwabrk - Places an access breakpoint + hwabrk breakpointno length address +hwrmbrk - Removes a breakpoint + hwrmbrk breakpointno +exinfo - Tells whether a software or hardware breakpoint has occurred. + Prints number of the hardware breakpoint if a hardware breakpoint has + occurred. + +Arguments required by these commands are as follows +breakpointno - 0 to 3 +length - 1 to 3 +address - Memory location in hex digits ( without 0x ) e.g c015e9bc + +SMP support +========== + +When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb +client, all the processors are forced to enter the debugger. Current +thread corresponds to the thread running on the processor where +breakpoint occurred. Threads running on other processor(s) appear +similar to other non-running threads in the 'info threads' output. +Within the kgdb stub there is a structure "waiting_cpus" in which kgdb +records the values of "current" and "regs" for each CPU other than the +one that hit the breakpoint. "current" is a pointer to the task +structure for the task that CPU is running, while "regs" points to the +saved registers for the task. This structure can be examined with the +gdb "p" command. + +ia-32 hardware debugging registers on all processors are set to same +values. Hence any hardware breakpoints may occur on any processor. + +gdb troubleshooting +=================== + +1. gdb hangs +Kill it. restart gdb. Connect to target machine. + +2. gdb cannot connect to target machine (after killing a gdb and +restarting another) If the target machine was not inside debugger when +you killed gdb, gdb cannot connect because the target machine won't +respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. +e.g. echo -e "\003" > /dev/ttyS1 +This forces that target machine into the debugger, after which you +can connect. + +3. gdb cannot connect even after echoing Ctrl+C into serial line +Try changing serial line settings min to 1 and time to 0 +e.g. stty min 1 time 0 < /dev/ttyS1 +Try echoing again + +Check serial line speed and set it to correct value if required +e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + +EVENTS +====== + +Ever want to know the order of things happening? Which CPU did what and +when? How did the spinlock get the way it is? Then events are for +you. Events are defined by calls to an event collection interface and +saved for later examination. In this case, kgdb events are saved by a +very fast bit of code in kgdb which is fully SMP and interrupt protected +and they are examined by using gdb to display them. Kgdb keeps only +the last N events, where N must be a power of two and is defined at +configure time. + + +Events are signaled to kgdb by calling: + +kgdb_ts(data0,data1) + +For each call kgdb records each call in an array along with other info. +Here is the array definition: + +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + int data0; + int data1; +}; + +For SMP machines the CPU is recorded, for all machines the TSC is +recorded (gets a time stamp) as well as the line number and source file +the call was made from. The address of the (from), the "if" (interrupt +flag) and the two data items are also recorded. The macro kgdb_ts casts +the types to int, so you can put any 32-bit values here. There is a +configure option to select the number of events you want to keep. A +nice number might be 128, but you can keep up to 1024 if you want. The +number must be a power of two. An "andthen" macro library is provided +for gdb to help you look at these events. It is also possible to define +a different structure for the event storage and cast the data to this +structure. For example the following structure is defined in kgdb: + +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + struct task_struct *t1; + struct task_struct *t2; +}; + +If you use this for display, the data elements will be displayed as +pointers to task_struct entries. You may want to define your own +structure to use in casting. You should only change the last two items +and you must keep the structure size the same. Kgdb will handle these +as 32-bit ints, but within that constraint you can define a structure to +cast to any 32-bit quantity. This need only be available to gdb and is +only used for casting in the display code. + +Final Items +=========== + +I picked up this code from Amit S. Kale and enhanced it. + +If you make some really cool modification to this stuff, or if you +fix a bug, please let me know. + +George Anzinger + + +Amit S. Kale + + +(First kgdb by David Grothe ) + +(modified by Tigran Aivazian ) + Putting gdbstub into the kernel config menu. + +(modified by Scott Foehner ) + Hooks for entering gdbstub at boot time. + +(modified by Amit S. Kale ) + Threads, ia-32 hw debugging, mp support, console support, + nmi watchdog handling. + +(modified by George Anzinger ) + Extended threads to include the idle threads. + Enhancements to allow breakpoint() at first C code. + Use of module_init() and __setup() to automate the configure. + Enhanced the cpu "collection" code to work in early bring-up. + Added ability to call functions from gdb + Print info thread stuff without going back to schedule() + Now collect the "other" cpus with an IPI/ NMI. --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/i386/kgdb/loadmodule.sh 2004-01-21 23:26:58.000000000 -0800 @@ -0,0 +1,78 @@ +#/bin/sh +# This script loads a module on a target machine and generates a gdb script. +# source generated gdb script to load the module file at appropriate addresses +# in gdb. +# +# Usage: +# Loading the module on target machine and generating gdb script) +# [foo]$ loadmodule.sh +# +# Loading the module file into gdb +# (gdb) source +# +# Modify following variables according to your setup. +# TESTMACHINE - Name of the target machine +# GDBSCRIPTS - The directory where a gdb script will be generated +# +# Author: Amit S. Kale (akale@veritas.com). +# +# If you run into problems, please check files pointed to by following +# variables. +# ERRFILE - /tmp/.errs contains stderr output of insmod +# MAPFILE - /tmp/.map contains stdout output of insmod +# GDBSCRIPT - $GDBSCRIPTS/load gdb script. + +TESTMACHINE=foo +GDBSCRIPTS=/home/bar + +if [ $# -lt 1 ] ; then { + echo Usage: $0 modulefile + exit +} ; fi + +MODULEFILE=$1 +MODULEFILEBASENAME=`basename $1` + +if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { + MODULEFILE=`pwd`/$MODULEFILE +} fi + +ERRFILE=/tmp/$MODULEFILEBASENAME.errs +MAPFILE=/tmp/$MODULEFILEBASENAME.map +GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME + +function findaddr() { + local ADDR=0x$(echo "$SEGMENTS" | \ + grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ + sed 's/[ ]*[^ ]*$//') + echo $ADDR +} + +function checkerrs() { + if [ "`cat $ERRFILE`" != "" ] ; then { + cat $ERRFILE + exit + } fi +} + +#load the module +echo Copying $MODULEFILE to $TESTMACHINE +rcp $MODULEFILE root@${TESTMACHINE}: + +echo Loading module $MODULEFILE +rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ + > $MAPFILE 2> $ERRFILE +checkerrs + +SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` +TEXTADDR=$(findaddr "\\.text[^.]") +LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" +SEGADDRS=`echo "$SEGMENTS" | awk '//{ + if ($1 != ".text" && $1 != ".this" && + $1 != ".kstrtab" && $1 != ".kmodtab") { + print " -s " $1 " 0x" $3 " " + } +}'` +LOADSTRING="$LOADSTRING $SEGADDRS" +echo Generating script $GDBSCRIPT +echo $LOADSTRING > $GDBSCRIPT --- linux-2.6.2-rc1/Documentation/i386/zero-page.txt 2004-01-20 21:51:17.000000000 -0800 +++ 25/Documentation/i386/zero-page.txt 2004-01-21 23:30:45.000000000 -0800 @@ -72,8 +72,10 @@ Offset Type Description 0x21c unsigned long INITRD_SIZE, size in bytes of ramdisk image 0x220 4 bytes (setup.S) 0x224 unsigned short setup.S heap end pointer +0x2cc 4 bytes DISK80_SIG_BUFFER (setup.S) 0x2d0 - 0x600 E820MAP -0x600 - 0x7D4 EDDBUF (setup.S) +0x600 - 0x7ff EDDBUF (setup.S) for disk signature read sector +0x600 - 0x7d3 EDDBUF (setup.S) for edd data 0x800 string, 2K max COMMAND_LINE, the kernel commandline as copied using CL_OFFSET. --- linux-2.6.2-rc1/Documentation/ide.txt 2003-10-17 15:58:03.000000000 -0700 +++ 25/Documentation/ide.txt 2004-01-21 23:30:41.000000000 -0800 @@ -198,12 +198,11 @@ drivers can always be compiled as loadab can only be compiled into the kernel, and the core code (ide.c) can be compiled as a loadable module provided no chipset support is needed. -When using ide.c/ide-tape.c as modules in combination with kerneld, add: +When using ide.c as a module in combination with kmod, add: alias block-major-3 ide-probe - alias char-major-37 ide-tape -respectively to /etc/modules.conf. +to /etc/modprobe.conf. When ide.c is used as a module, you can pass command line parameters to the driver using the "options=" keyword to insmod, while replacing any ',' with --- linux-2.6.2-rc1/Documentation/kernel-parameters.txt 2004-01-20 21:51:17.000000000 -0800 +++ 25/Documentation/kernel-parameters.txt 2004-01-21 23:27:12.000000000 -0800 @@ -96,6 +96,23 @@ running once the system is up. ht -- run only enough ACPI to enable Hyper Threading See also Documentation/pm.txt. + acpi_pic_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode + Format: { level | edge } + level Force PIC-mode SCI to Level Trigger (default) + edge Force PIC-mode SCI to Edge Trigge + + acpi_irq_balance [HW,ACPI] ACPI will balance active IRQs + default in APIC mode + + acpi_irq_nobalance [HW,ACPI] ACPI will not move active IRQs (default) + default in PIC mode + + acpi_irq_pci= [HW,ACPI] If irq_balance, Clear listed IRQs for use by PCI + Format: ,... + + acpi_irq_isa= [HW,ACPI] If irq_balance, Mark listed IRQs used by ISA + Format: ,... + ad1816= [HW,OSS] Format: ,,, See also Documentation/sound/oss/AD1816. @@ -220,7 +237,7 @@ running once the system is up. Forces specified timesource (if avaliable) to be used when calculating gettimeofday(). If specicified timesource is not avalible, it defaults to PIT. - Format: { pit | tsc | cyclone | ... } + Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25/Documentation/laptop-mode.txt 2004-01-21 23:29:57.000000000 -0800 @@ -0,0 +1,480 @@ +How to conserve battery power using laptop-mode +----------------------------------------------- + +Document Author: Bart Samwel (bart@samwel.tk) +Date created: January 2, 2004 + +Introduction +------------ + +Laptopmode is used to minimize the time that the hard disk needs to be spun up, +to conserve battery power on laptops. It has been reported to cause significant +power savings. + +Contents +-------- + +* Introduction +* The short story +* Caveats +* The details +* Tips & Tricks +* Control script +* ACPI integration +* Monitoring tool + + +The short story +--------------- + +If you just want to use it, run the laptop_mode control script (which is included +at the end of this document) as follows: + +# laptop_mode start + +Then set your harddisk spindown time to a relatively low value with hdparm: + +hdparm -S 4 /dev/hda + +The value -S 4 means 20 seconds idle time before spindown. Your harddisk will +now only spin up when a disk cache miss occurs, or at least once every 10 +minutes to write back any pending changes. + +To stop laptop_mode, remount your filesystems with regular commit intervals +(e.g., 5 seconds), and run "laptop_mode stop". + + +Caveats +------- + +* The downside of laptop mode is that you have a chance of losing up + to 10 minutes of work. If you cannot afford this, don't use it! + +* Most desktop hard drives have a very limited lifetime measured in spindown + cycles, typically about 50.000 times (it's usually listed on the spec sheet). + Check your drive's rating, and don't wear down your drive's lifetime if you + don't need to. + +* If you mount some of your ext3/reiserfs filesystems with the -n option, then + the control script will not be able to remount them correctly. You must set + DO_REMOUNTS=0 in the control script, otherwise it will remount them with the + wrong options -- or it will fail because it cannot write to /etc/mtab. + +* If you have your filesystems listed as type "auto" in fstab, like I did, then + the control script will not recognize them as filesystems that need remounting. + +The details +----------- + +Laptop-mode is controlled by the flag /proc/sys/vm/laptop_mode. When this +flag is set, any physical disk read operation (that might have caused the +hard disk to spin up) causes Linux to flush all dirty blocks. The result +of this is that after a disk has spun down, it will not be spun up anymore +to write dirty blocks, because those blocks had already been written +immediately after the most recent read operation + +To increase the effectiveness of the laptop_mode strategy, the laptop_mode +control script increases dirty_expire_centisecs and dirty_writeback_centisecs in +/proc/sys/vm to about 10 minutes (by default), which means that pages that are +dirtied are not forced to be written to disk as often. The control script also +changes the dirty background ratio, so that background writeback of dirty pages +is not done anymore. Combined with a higher commit value (also 10 minutes) for +ext3 or ReiserFS filesystems (also done automatically by the control script), +this results in concentration of disk activity in a small time interval which +occurs only once every 10 minutes, or whenever the disk is forced to spin up by +a cache miss. The disk can then be spun down in the periods of inactivity. + +If you want to find out which process caused the disk to spin up, you can +gather information by setting the flag /proc/sys/vm/block_dump. When this flag +is set, Linux reports all disk read and write operations that take place, and +all block dirtyings done to files. This makes it possible to debug why a disk +needs to spin up, and to increase battery life even more. + +If 10 minutes is too much or too little downtime for you, you can configure +this downtime as follows. In the control script, set the MAX_AGE value to the +maximum number of seconds of disk downtime that you would like. You should +then set your filesystem's commit interval to the same value. The dirty ratio +is also configurable from the control script. + +If you don't like the idea of the control script remounting your filesystems +for you, you can change DO_REMOUNTS to 0 in the script. + +Thanks to Kiko Piris, the control script can be used to enable laptop mode on +both the Linux 2.4 and 2.6 series. + + +Tips & Tricks +------------- + +* Bartek Kania reports getting up to 50 minutes of extra battery life (on top + of his regular 3 to 3.5 hours) using very aggressive power management (hdparm + -B1) and a spindown time of 5 seconds (hdparm -S1). + +* You can spin down the disk while playing MP3, by setting the disk readahead + to 8MB (hdparm -a 16384). Effectively, the disk will read a complete MP3 at + once, and will then spin down while the MP3 is playing. (Thanks to Bartek + Kania.) + +* Drew Scott Daniels observed: "I don't know why, but when I decrease the number + of colours that my display uses it consumes less battery power. I've seen + this on powerbooks too. I hope that this is a piece of information that + might be useful to the Laptop Mode patch or it's users." + + +Control script +-------------- + +Please note that this control script works for the Linux 2.4 and 2.6 series. + +--------------------CONTROL SCRIPT BEGIN------------------------------------------ +#!/bin/sh + +# start or stop laptop_mode, best run by a power management daemon when +# ac gets connected/disconnected from a laptop +# +# install as /sbin/laptop_mode +# +# Contributors to this script: Kiko Piris +# Bart Samwel +# Dax Kelson +# Original Linux 2.4 version by: Jens Axboe + +parse_mount_opts () { + echo "$*" | \ + sed 's/commit=[0-9]*//g' | \ + sed 's/,,*/,/g' | \ + sed 's/^,//' | \ + sed 's/,$//' | \ + cat - +} + +KLEVEL="$(uname -r | cut -c1-3)" +case "$KLEVEL" in + "2.4") + true + ;; + "2.6") + true + ;; + *) + echo "Unhandled kernel level: $KLEVEL ('uname -r' = '$(uname -r)')" + exit 1 + ;; +esac + +# Shall we remount journaled fs. with appropiate commit interval? (1=yes) +DO_REMOUNTS=1 + +# age time, in seconds. should be put into a sysconfig file +MAX_AGE=600 + +# Allowed dirty ratio, in pct. should be put into a sysconfig file as well. +DIRTY_RATIO=40 + +# kernel default dirty buffer age +DEF_AGE=30 +DEF_UPDATE=5 +DEF_DIRTY_BACKGROUND_RATIO=10 +DEF_DIRTY_RATIO=40 + + +if [ ! -e /proc/sys/vm/laptop_mode ]; then + echo "Kernel is not patched with laptop_mode patch." + exit 1 +fi + +if [ ! -w /proc/sys/vm/laptop_mode ]; then + echo "You do not have enough privileges to enable laptop_mode." + exit 1 +fi + +case "$1" in + start) + AGE=$((100*$MAX_AGE)) + echo -n "Starting laptop_mode" + case "$KLEVEL" in + "2.4") + echo "1" > /proc/sys/vm/laptop_mode + echo "30 500 0 0 $AGE $AGE 60 20 0" > /proc/sys/vm/bdflush + ;; + "2.6") + echo "1" > /proc/sys/vm/laptop_mode + echo "$AGE" > /proc/sys/vm/dirty_writeback_centisecs + echo "$AGE" > /proc/sys/vm/dirty_expire_centisecs + echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_ratio + echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_background_ratio + ;; + esac + if [ $DO_REMOUNTS -eq 1 ]; then + cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do + PARSEDOPTS="$(parse_mount_opts "$OPTS")" + case "$FST" in + "ext3") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + "reiserfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + "xfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE ;; + esac + done + fi + echo "." + ;; + stop) + U_AGE=$((100*$DEF_UPDATE)) + B_AGE=$((100*$DEF_AGE)) + echo -n "Stopping laptop_mode" + case "$KLEVEL" in + "2.4") + echo "0" > /proc/sys/vm/laptop_mode + echo "30 500 0 0 $U_AGE $B_AGE 60 20 0" > /proc/sys/vm/bdflush + ;; + "2.6") + echo "0" > /proc/sys/vm/laptop_mode + echo "$U_AGE" > /proc/sys/vm/dirty_writeback_centisecs + echo "$B_AGE" > /proc/sys/vm/dirty_expire_centisecs + echo "$DEF_DIRTY_RATIO" > /proc/sys/vm/dirty_ratio + echo "$DEF_DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio + ;; + esac + if [ $DO_REMOUNTS -eq 1 ]; then + cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do + PARSEDOPTS="$(parse_mount_opts "$OPTS")" + case "$FST" in + "ext3") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + "reiserfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + "xfs") mount $DEV -t $FST $MP -o remount,$PARSEDOPTS ;; + esac + done + fi + echo "." + ;; + *) + echo "$0 {start|stop}" + ;; + +esac + +exit 0 + +--------------------CONTROL SCRIPT END-------------------------------------------- + + +ACPI integration +---------------- + +Dax Kelson submitted this so that the ACPI acpid daemon will +kick off the laptop_mode script and run hdparm. + +---------------------------/etc/acpi/events/ac_adapter BEGIN------------------------------------------- +event=ac_adapter +action=/etc/acpi/actions/battery.sh +---------------------------/etc/acpi/events/ac_adapter END------------------------------------------- + +---------------------------/etc/acpi/actions/battery.sh BEGIN------------------------------------------- +#!/bin/sh + +# cpu throttling +# cat /proc/acpi/processor/CPU0/throttling for more info +ACAD_THR=0 +BATT_THR=2 + +# spindown time for HD (man hdparm for valid values) +# I prefer 2 hours for acad and 20 seconds for batt +ACAD_HD=244 +BATT_HD=4 + +# ac/battery event handler + +status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/AC/state` + +case $status in + "on-line") + echo "Setting HD spindown to 2 hours" + /sbin/laptop-mode stop + /sbin/hdparm -S $ACAD_HD /dev/hda > /dev/null 2>&1 + /sbin/hdparm -B 255 /dev/hda > /dev/null 2>&1 + #echo -n $ACAD_CPU:$ACAD_THR > /proc/acpi/processor/CPU0/limit + exit 0 + ;; + "off-line") + echo "Setting HD spindown to 20 seconds" + /sbin/laptop-mode start + /sbin/hdparm -S $BATT_HD /dev/hda > /dev/null 2>&1 + /sbin/hdparm -B 1 /dev/hda > /dev/null 2>&1 + #echo -n $BATT_CPU:$BATT_THR > /proc/acpi/processor/CPU0/limit + exit 0 + ;; +esac +---------------------------/etc/acpi/actions/battery.sh END------------------------------------------- + +Monitoring tool +--------------- + +Bartek Kania submitted this, it can be used to measure how much time your disk +spends spun up/down. + +---------------------------dslm.c BEGIN------------------------------------------- +/* + * Simple Disk SLeep Monitor + * by Bartek Kania + * Licenced under the GPL + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define D(x) x +#else +#define D(x) +#endif + +int endit = 0; + +/* Check if the disk is in powersave-mode + * Most of the code is stolen from hdparm. + * 1 = active, 0 = standby/sleep, -1 = unknown */ +int check_powermode(int fd) +{ + unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0}; + int state; + + if (ioctl(fd, HDIO_DRIVE_CMD, &args) + && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */ + && ioctl(fd, HDIO_DRIVE_CMD, &args)) { + if (errno != EIO || args[0] != 0 || args[1] != 0) { + state = -1; /* "unknown"; */ + } else + state = 0; /* "sleeping"; */ + } else { + state = (args[2] == 255) ? 1 : 0; + } + D(printf(" drive state is: %s\n", state)); + + return state; +} + +char *state_name(int i) +{ + if (i == -1) return "unknown"; + if (i == 0) return "sleeping"; + if (i == 1) return "active"; + + return "internal error"; +} + +char *myctime(time_t time) +{ + char *ts = ctime(&time); + ts[strlen(ts) - 1] = 0; + + return ts; +} + +void measure(int fd) +{ + time_t start_time; + int last_state; + time_t last_time; + int curr_state; + time_t curr_time = 0; + time_t time_diff; + time_t active_time = 0; + time_t sleep_time = 0; + time_t unknown_time = 0; + time_t total_time = 0; + int changes = 0; + float tmp; + + printf("Starting measurements\n"); + + last_state = check_powermode(fd); + start_time = last_time = time(0); + printf(" System is in state %s\n\n", state_name(last_state)); + + while(!endit) { + sleep(1); + curr_state = check_powermode(fd); + + if (curr_state != last_state || endit) { + changes++; + curr_time = time(0); + time_diff = curr_time - last_time; + + if (last_state == 1) active_time += time_diff; + else if (last_state == 0) sleep_time += time_diff; + else unknown_time += time_diff; + + last_state = curr_state; + last_time = curr_time; + + printf("%s: State-change to %s\n", myctime(curr_time), + state_name(curr_state)); + } + } + changes--; /* Compensate for SIGINT */ + + total_time = time(0) - start_time; + printf("\nTotal running time: %lus\n", curr_time - start_time); + printf(" State changed %d times\n", changes); + + tmp = (float)sleep_time / (float)total_time * 100; + printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp); + tmp = (float)active_time / (float)total_time * 100; + printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp); + tmp = (float)unknown_time / (float)total_time * 100; + printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp); +} + +void ender(int s) +{ + endit = 1; +} + +void usage() +{ + puts("usage: dslm [-w