--- linux-2.6.5-rc1/arch/alpha/Kconfig 2004-03-10 20:41:24.000000000 -0800 +++ 25/arch/alpha/Kconfig 2004-03-17 19:30:30.357208864 -0800 @@ -495,8 +495,8 @@ config SMP Y to "Enhanced Real Time Clock Support", below. The "Advanced Power Management" code will be disabled if you say Y here. - See also the , - , , + See also the , + , and the SMP-HOWTO available at . --- linux-2.6.5-rc1/arch/arm26/defconfig 2003-06-14 12:18:04.000000000 -0700 +++ 25/arch/arm26/defconfig 2004-03-17 19:29:21.047745504 -0800 @@ -185,9 +185,6 @@ CONFIG_SOUND_GAMEPORT=y # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set CONFIG_I2C_ALGOPCF=y # CONFIG_I2C_ELEKTOR is not set CONFIG_I2C_CHARDEV=y --- linux-2.6.5-rc1/arch/arm26/Kconfig 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/arm26/Kconfig 2004-03-17 19:29:21.046745656 -0800 @@ -198,11 +198,6 @@ source "drivers/input/Kconfig" source "drivers/char/Kconfig" -config KBDMOUSE - bool - depends on ARCH_ACORN && BUSMOUSE=y - default y - source "drivers/media/Kconfig" source "fs/Kconfig" --- linux-2.6.5-rc1/arch/arm/configs/a5k_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/a5k_defconfig 2004-03-17 19:29:21.037747024 -0800 @@ -317,9 +317,6 @@ CONFIG_SERIAL_CONSOLE=y # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_I2C_ALGOPCF is not set CONFIG_I2C_CHARDEV=y --- linux-2.6.5-rc1/arch/arm/configs/badge4_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/badge4_defconfig 2004-03-17 19:29:21.038746872 -0800 @@ -690,9 +690,6 @@ CONFIG_UNIX98_PTY_COUNT=256 # CONFIG_I2C=m CONFIG_I2C_ALGOBIT=m -# CONFIG_I2C_PHILIPSPAR is not set -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m # CONFIG_I2C_BIT_SA1100_GPIO is not set CONFIG_I2C_ALGOPCF=m CONFIG_I2C_ELEKTOR=m @@ -735,7 +732,6 @@ CONFIG_SOFT_WATCHDOG=m CONFIG_SA1100_WATCHDOG=m # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set -# CONFIG_I810_TCO is not set # CONFIG_MIXCOMWD is not set # CONFIG_60XX_WDT is not set # CONFIG_W83877F_WDT is not set --- linux-2.6.5-rc1/arch/arm/configs/cerfcube_defconfig 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/arm/configs/cerfcube_defconfig 2004-03-17 19:29:21.038746872 -0800 @@ -573,7 +573,6 @@ CONFIG_WATCHDOG=y CONFIG_SA1100_WATCHDOG=m # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set -# CONFIG_I810_TCO is not set # CONFIG_MIXCOMWD is not set # CONFIG_SCx200_WDT is not set # CONFIG_60XX_WDT is not set --- linux-2.6.5-rc1/arch/arm/configs/clps7500_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/clps7500_defconfig 2004-03-17 19:29:21.039746720 -0800 @@ -340,9 +340,6 @@ CONFIG_PRINTER=y # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_I2C_ALGOPCF is not set # CONFIG_I2C_CHARDEV is not set --- linux-2.6.5-rc1/arch/arm/configs/ebsa110_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/ebsa110_defconfig 2004-03-17 19:29:21.039746720 -0800 @@ -475,7 +475,6 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_ADVANTECH_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_MIXCOMWD is not set -# CONFIG_I810_TCO is not set # CONFIG_MACHZ_WDT is not set # CONFIG_INTEL_RNG is not set # CONFIG_NVRAM is not set --- linux-2.6.5-rc1/arch/arm/configs/footbridge_defconfig 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/arm/configs/footbridge_defconfig 2004-03-17 19:29:21.040746568 -0800 @@ -525,7 +525,6 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_ACQUIRE_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_MIXCOMWD is not set -# CONFIG_I810_TCO is not set CONFIG_21285_WATCHDOG=m CONFIG_977_WATCHDOG=m CONFIG_DS1620=y --- linux-2.6.5-rc1/arch/arm/configs/neponset_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/neponset_defconfig 2004-03-17 19:29:21.041746416 -0800 @@ -500,8 +500,6 @@ CONFIG_UNIX98_PTY_COUNT=32 # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_SCx200_ACB is not set CONFIG_I2C_BIT_SA1100_GPIO=y # CONFIG_I2C_ALGOPCF is not set @@ -536,7 +534,6 @@ CONFIG_WATCHDOG=y CONFIG_SA1100_WATCHDOG=m # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set -# CONFIG_I810_TCO is not set # CONFIG_MIXCOMWD is not set # CONFIG_SCx200_WDT is not set # CONFIG_60XX_WDT is not set --- linux-2.6.5-rc1/arch/arm/configs/netwinder_defconfig 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/arm/configs/netwinder_defconfig 2004-03-17 19:29:21.041746416 -0800 @@ -617,7 +617,6 @@ CONFIG_WATCHDOG=y CONFIG_977_WATCHDOG=y # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set -# CONFIG_I810_TCO is not set # CONFIG_MIXCOMWD is not set # CONFIG_SCx200_WDT is not set # CONFIG_60XX_WDT is not set --- linux-2.6.5-rc1/arch/arm/configs/pfs168_mqtft_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/pfs168_mqtft_defconfig 2004-03-17 19:29:21.042746264 -0800 @@ -506,9 +506,6 @@ CONFIG_UNIX98_PTY_COUNT=32 # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_I2C_ASSABET is not set CONFIG_I2C_PFS168=y # CONFIG_I2C_ALGOPCF is not set --- linux-2.6.5-rc1/arch/arm/configs/pfs168_mqvga_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/pfs168_mqvga_defconfig 2004-03-17 19:29:21.043746112 -0800 @@ -506,9 +506,6 @@ CONFIG_UNIX98_PTY_COUNT=32 # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_I2C_ASSABET is not set CONFIG_I2C_PFS168=y # CONFIG_I2C_ALGOPCF is not set --- linux-2.6.5-rc1/arch/arm/configs/pfs168_sastn_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/pfs168_sastn_defconfig 2004-03-17 19:29:21.043746112 -0800 @@ -507,9 +507,6 @@ CONFIG_UNIX98_PTY_COUNT=32 # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_I2C_ASSABET is not set # CONFIG_I2C_ALGOPCF is not set CONFIG_I2C_CHARDEV=y --- linux-2.6.5-rc1/arch/arm/configs/pfs168_satft_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/pfs168_satft_defconfig 2004-03-17 19:29:21.044745960 -0800 @@ -506,9 +506,6 @@ CONFIG_UNIX98_PTY_COUNT=32 # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_I2C_ASSABET is not set CONFIG_I2C_PFS168=y # CONFIG_I2C_ALGOPCF is not set --- linux-2.6.5-rc1/arch/arm/configs/rpc_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/rpc_defconfig 2004-03-17 19:29:21.044745960 -0800 @@ -462,9 +462,6 @@ CONFIG_PRINTER=m # CONFIG_I2C=y CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_ALGOPCF is not set CONFIG_I2C_CHARDEV=y --- linux-2.6.5-rc1/arch/arm/configs/shannon_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/shannon_defconfig 2004-03-17 19:29:21.045745808 -0800 @@ -476,7 +476,6 @@ CONFIG_WATCHDOG=y CONFIG_SA1100_WATCHDOG=y # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set -# CONFIG_I810_TCO is not set # CONFIG_MIXCOMWD is not set # CONFIG_60XX_WDT is not set # CONFIG_W83877F_WDT is not set --- linux-2.6.5-rc1/arch/arm/configs/trizeps_defconfig 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm/configs/trizeps_defconfig 2004-03-17 19:29:21.046745656 -0800 @@ -624,9 +624,6 @@ CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_I2C=m CONFIG_I2C_ALGOBIT=m -# CONFIG_I2C_PHILIPSPAR is not set -# CONFIG_I2C_ELV is not set -# CONFIG_I2C_VELLEMAN is not set # CONFIG_SCx200_I2C is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_BIT_SA1100_GPIO is not set --- linux-2.6.5-rc1/arch/arm/Kconfig 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/arm/Kconfig 2004-03-17 19:30:30.358208712 -0800 @@ -351,7 +351,7 @@ config CPU_FREQ_INTEGRATOR help This enables the CPUfreq driver for ARM Integrator CPUs. - For details, take a look at linux/Documentation/cpufreq. + For details, take a look at linux/Documentation/cpu-freq. If in doubt, say Y. --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/i386/boot/edd.S 2004-03-17 19:31:05.288898440 -0800 @@ -0,0 +1,127 @@ +/* + * BIOS Enhanced Disk Drive support + * by Matt Domsch October 2002 + * conformant to T13 Committee www.t13.org + * projects 1572D, 1484D, 1386D, 1226DT + * disk signature read by Matt Domsch + * and Andrew Wilks September 2003 + * legacy CHS retreival by Patrick J. LoPresti + * March 2004 + */ + +#include + +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) +# Read the first sector of device 80h and store the 4-byte signature + movl $0xFFFFFFFF, %eax + movl %eax, (DISK80_SIG_BUFFER) # assume failure + movb $READ_SECTORS, %ah + movb $1, %al # read 1 sector + movb $0x80, %dl # from device 80 + movb $0, %dh # at head 0 + movw $1, %cx # cylinder 0, sector 0 + pushw %es + pushw %ds + popw %es + movw $EDDBUF, %bx + pushw %dx # work around buggy BIOSes + stc # work around buggy BIOSes + int $0x13 + sti # work around buggy BIOSes + popw %dx + jc disk_sig_done + movl (EDDBUF+MBR_SIG_OFFSET), %eax + movl %eax, (DISK80_SIG_BUFFER) # store success +disk_sig_done: + popw %es + +# Do the BIOS Enhanced Disk Drive calls +# This consists of two calls: +# int 13h ah=41h "Check Extensions Present" +# int 13h ah=48h "Get Device Parameters" +# int 13h ah=08h "Legacy Get Device Parameters" +# +# A buffer of size EDDMAXNR*(EDDEXTSIZE+EDDPARMSIZE) is reserved for our use +# in the boot_params at EDDBUF. The first four bytes of which are +# used to store the device number, interface support map and version +# results from fn41. The next four bytes are used to store the legacy +# cylinders, heads, and sectors from fn08. The following 74 bytes are used to +# store the results from fn48. Starting from device 80h, fn41, then fn48 +# are called and their results stored in EDDBUF+n*(EDDEXTSIZE+EDDPARMIZE). +# Then the pointer is incremented to store the data for the next call. +# This repeats until either a device doesn't exist, or until EDDMAXNR +# devices have been stored. +# The one tricky part is that ds:si always points EDDEXTSIZE bytes into +# the structure, and the fn41 and fn08 results are stored at offsets +# from there. This removes the need to increment the pointer for +# every store, and leaves it ready for the fn48 call. +# A second one-byte buffer, EDDNR, in the boot_params stores +# the number of BIOS devices which exist, up to EDDMAXNR. +# In setup.c, copy_edd() stores both boot_params buffers away +# for later use, as they would get overwritten otherwise. +# This code is sensitive to the size of the structs in edd.h +edd_start: + # %ds points to the bootsector + # result buffer for fn48 + movw $EDDBUF+EDDEXTSIZE, %si # in ds:si, fn41 results + # kept just before that + movb $0, (EDDNR) # zero value at EDDNR + movb $0x80, %dl # BIOS device 0x80 + +edd_check_ext: + movb $CHECKEXTENSIONSPRESENT, %ah # Function 41 + movw $EDDMAGIC1, %bx # magic + int $0x13 # make the call + jc edd_done # no more BIOS devices + + cmpw $EDDMAGIC2, %bx # is magic right? + jne edd_next # nope, next... + + movb %dl, %ds:-8(%si) # store device number + movb %ah, %ds:-7(%si) # store version + movw %cx, %ds:-6(%si) # store extensions + incb (EDDNR) # note that we stored something + +edd_get_device_params: + movw $EDDPARMSIZE, %ds:(%si) # put size + movw $0x0, %ds:2(%si) # work around buggy BIOSes + movb $GETDEVICEPARAMETERS, %ah # Function 48 + int $0x13 # make the call + # Don't check for fail return + # it doesn't matter. +edd_get_legacy_chs: + xorw %ax, %ax + movw %ax, %ds:-4(%si) + movw %ax, %ds:-2(%si) + # Ralf Brown's Interrupt List says to set ES:DI to + # 0000h:0000h "to guard against BIOS bugs" + pushw %es + movw %ax, %es + movw %ax, %di + pushw %dx # legacy call clobbers %dl + movb $LEGACYGETDEVICEPARAMETERS, %ah # Function 08 + int $0x13 # make the call + jc edd_legacy_done # failed + movb %cl, %al # Low 6 bits are max + andb $0x3F, %al # sector number + movb %al, %ds:-1(%si) # Record max sect + movb %dh, %ds:-2(%si) # Record max head number + movb %ch, %al # Low 8 bits of max cyl + shr $6, %cl + movb %cl, %ah # High 2 bits of max cyl + movw %ax, %ds:-4(%si) + +edd_legacy_done: + popw %dx + popw %es + movw %si, %ax # increment si + addw $EDDPARMSIZE+EDDEXTSIZE, %ax + movw %ax, %si + +edd_next: + incb %dl # increment to next device + cmpb $EDDMAXNR, (EDDNR) # Out of space? + jb edd_check_ext # keep looping + +edd_done: +#endif --- linux-2.6.5-rc1/arch/i386/boot/setup.S 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/boot/setup.S 2004-03-17 19:31:17.663017288 -0800 @@ -44,15 +44,6 @@ * * New A20 code ported from SYSLINUX by H. Peter Anvin. AMD Elan bugfixes * by Robert Schwebel, December 2001 - * - * BIOS Enhanced Disk Drive support - * by Matt Domsch October 2002 - * conformant to T13 Committee www.t13.org - * projects 1572D, 1484D, 1386D, 1226DT - * disk signature read by Matt Domsch - * and Andrew Wilks September 2003 - * legacy CHS retreival by Patrick J. LoPresti - * March 2004 */ #include @@ -61,7 +52,6 @@ #include #include #include -#include #include /* Signature words to ensure LILO loaded us right */ @@ -166,7 +156,7 @@ cmd_line_ptr: .long 0 # (Header versio # can be located anywhere in # low memory 0x10000 or higher. -ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) +ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) # The highest safe address for # the contents of an initrd @@ -581,120 +571,7 @@ no_32_apm_bios: done_apm_bios: #endif -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -# Read the first sector of device 80h and store the 4-byte signature - movl $0xFFFFFFFF, %eax - movl %eax, (DISK80_SIG_BUFFER) # assume failure - movb $READ_SECTORS, %ah - movb $1, %al # read 1 sector - movb $0x80, %dl # from device 80 - movb $0, %dh # at head 0 - movw $1, %cx # cylinder 0, sector 0 - pushw %es - pushw %ds - popw %es - movw $EDDBUF, %bx - pushw %dx # work around buggy BIOSes - stc # work around buggy BIOSes - int $0x13 - sti # work around buggy BIOSes - popw %dx - jc disk_sig_done - movl (EDDBUF+MBR_SIG_OFFSET), %eax - movl %eax, (DISK80_SIG_BUFFER) # store success -disk_sig_done: - popw %es - -# Do the BIOS Enhanced Disk Drive calls -# This consists of two calls: -# int 13h ah=41h "Check Extensions Present" -# int 13h ah=48h "Get Device Parameters" -# int 13h ah=08h "Legacy Get Device Parameters" -# -# A buffer of size EDDMAXNR*(EDDEXTSIZE+EDDPARMSIZE) is reserved for our use -# in the empty_zero_page at EDDBUF. The first four bytes of which are -# used to store the device number, interface support map and version -# results from fn41. The next four bytes are used to store the legacy -# cylinders, heads, and sectors from fn08. The following 74 bytes are used to -# store the results from fn48. Starting from device 80h, fn41, then fn48 -# are called and their results stored in EDDBUF+n*(EDDEXTSIZE+EDDPARMIZE). -# Then the pointer is incremented to store the data for the next call. -# This repeats until either a device doesn't exist, or until EDDMAXNR -# devices have been stored. -# The one tricky part is that ds:si always points EDDEXTSIZE bytes into -# the structure, and the fn41 and fn08 results are stored at offsets -# from there. This removes the need to increment the pointer for -# every store, and leaves it ready for the fn48 call. -# A second one-byte buffer, EDDNR, in the empty_zero_page stores -# the number of BIOS devices which exist, up to EDDMAXNR. -# In setup.c, copy_edd() stores both empty_zero_page buffers away -# for later use, as they would get overwritten otherwise. -# This code is sensitive to the size of the structs in edd.h -edd_start: - # %ds points to the bootsector - # result buffer for fn48 - movw $EDDBUF+EDDEXTSIZE, %si # in ds:si, fn41 results - # kept just before that - movb $0, (EDDNR) # zero value at EDDNR - movb $0x80, %dl # BIOS device 0x80 - -edd_check_ext: - movb $CHECKEXTENSIONSPRESENT, %ah # Function 41 - movw $EDDMAGIC1, %bx # magic - int $0x13 # make the call - jc edd_done # no more BIOS devices - - cmpw $EDDMAGIC2, %bx # is magic right? - jne edd_next # nope, next... - - movb %dl, %ds:-8(%si) # store device number - movb %ah, %ds:-7(%si) # store version - movw %cx, %ds:-6(%si) # store extensions - incb (EDDNR) # note that we stored something - -edd_get_device_params: - movw $EDDPARMSIZE, %ds:(%si) # put size - movw $0x0, %ds:2(%si) # work around buggy BIOSes - movb $GETDEVICEPARAMETERS, %ah # Function 48 - int $0x13 # make the call - # Don't check for fail return - # it doesn't matter. -edd_get_legacy_chs: - xorw %ax, %ax - movw %ax, %ds:-4(%si) - movw %ax, %ds:-2(%si) - # Ralf Brown's Interrupt List says to set ES:DI to - # 0000h:0000h "to guard against BIOS bugs" - pushw %es - movw %ax, %es - movw %ax, %di - pushw %dx # legacy call clobbers %dl - movb $LEGACYGETDEVICEPARAMETERS, %ah # Function 08 - int $0x13 # make the call - jc edd_legacy_done # failed - movb %cl, %al # Low 6 bits are max - andb $0x3F, %al # sector number - movb %al, %ds:-1(%si) # Record max sect - movb %dh, %ds:-2(%si) # Record max head number - movb %ch, %al # Low 8 bits of max cyl - shr $6, %cl - movb %cl, %ah # High 2 bits of max cyl - movw %ax, %ds:-4(%si) - -edd_legacy_done: - popw %dx - popw %es - movw %si, %ax # increment si - addw $EDDPARMSIZE+EDDEXTSIZE, %ax - movw %ax, %si - -edd_next: - incb %dl # increment to next device - cmpb $EDDMAXNR, (EDDNR) # Out of space? - jb edd_check_ext # keep looping - -edd_done: -#endif +#include "edd.S" # Now we want to move to protected mode ... cmpw $0, %cs:realmode_swtch --- linux-2.6.5-rc1/arch/i386/defconfig 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/defconfig 2004-03-17 19:29:21.049745200 -0800 @@ -66,7 +66,6 @@ CONFIG_MPENTIUM4=y # CONFIG_MK6 is not set # CONFIG_MK7 is not set # CONFIG_MK8 is not set -# CONFIG_MELAN is not set # CONFIG_MCRUSOE is not set # CONFIG_MWINCHIPC6 is not set # CONFIG_MWINCHIP2 is not set @@ -141,7 +140,6 @@ CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y -# CONFIG_ACPI_RELAXED_AML is not set # CONFIG_X86_PM_TIMER is not set # --- linux-2.6.5-rc1/arch/i386/Kconfig 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/Kconfig 2004-03-17 19:31:24.654954352 -0800 @@ -418,6 +418,54 @@ config X86_OOSTORE depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR default y +config X86_4G + bool "4 GB kernel-space and 4 GB user-space virtual memory support" + help + This option is only useful for systems that have more than 1 GB + of RAM. + + The default kernel VM layout leaves 1 GB of virtual memory for + kernel-space mappings, and 3 GB of VM for user-space applications. + This option ups both the kernel-space VM and the user-space VM to + 4 GB. + + The cost of this option is additional TLB flushes done at + system-entry points that transition from user-mode into kernel-mode. + I.e. system calls and page faults, and IRQs that interrupt user-mode + code. There's also additional overhead to kernel operations that copy + memory to/from user-space. The overhead from this is hard to tell and + depends on the workload - it can be anything from no visible overhead + to 20-30% overhead. A good rule of thumb is to count with a runtime + overhead of 20%. + + The upside is the much increased kernel-space VM, which more than + quadruples the maximum amount of RAM supported. Kernels compiled with + this option boot on 64GB of RAM and still have more than 3.1 GB of + 'lowmem' left. Another bonus is that highmem IO bouncing decreases, + if used with drivers that still use bounce-buffers. + + There's also a 33% increase in user-space VM size - database + applications might see a boost from this. + + But the cost of the TLB flushes and the runtime overhead has to be + weighed against the bonuses offered by the larger VM spaces. The + dividing line depends on the actual workload - there might be 4 GB + systems that benefit from this option. Systems with less than 4 GB + of RAM will rarely see a benefit from this option - but it's not + out of question, the exact circumstances have to be considered. + +config X86_SWITCH_PAGETABLES + def_bool X86_4G + +config X86_4G_VM_LAYOUT + def_bool X86_4G + +config X86_UACCESS_INDIRECT + def_bool X86_4G + +config X86_HIGH_ENTRY + def_bool X86_4G + config HPET_TIMER bool "HPET Timer Support" help @@ -454,8 +502,8 @@ config SMP Y to "Enhanced Real Time Clock Support", below. The "Advanced Power Management" code will be disabled if you say Y here. - See also the , - , , + See also the , + , and the SMP-HOWTO available at . @@ -475,6 +523,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help @@ -637,16 +695,7 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -config EDD - tristate "BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - Say Y or M here if you want to enable BIOS Enhanced Disk Drive - Services real mode BIOS calls to determine which disk - BIOS tries boot from. This information is then exported via driverfs. - - This option is experimental, but believed to be safe, - and most disk controller BIOS vendors do not yet implement this feature. +source "drivers/firmware/Kconfig" choice prompt "High Memory Support" @@ -1055,9 +1104,6 @@ choice prompt "PCI access mode" depends on PCI && !X86_VISWS default PCI_GOANY - -config PCI_GOBIOS - bool "BIOS" ---help--- On PCI systems, the BIOS can be used to detect the PCI devices and determine their configuration. However, some old PCI motherboards @@ -1073,6 +1119,9 @@ config PCI_GOBIOS direct access method and falls back to the BIOS if that doesn't work. If unsure, go with the default, which is "Any". +config PCI_GOBIOS + bool "BIOS" + config PCI_GOMMCONFIG bool "MMConfig" @@ -1269,6 +1318,15 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config SPINLINE + bool "Spinlock inlining" + depends on DEBUG_KERNEL + help + This will change spinlocks from out of line to inline, making them + account cost to the callers in readprofile, rather than the lock + itself (as ".text.lock.filename"). This can be helpful for finding + the callers of locks. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM @@ -1285,20 +1343,211 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + config FRAME_POINTER bool "Compile the kernel with frame pointers" + default KGDB help If you say Y here the resulting kernel image will be slightly larger and slower, but it will give very useful debugging information. If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame pointers. +config MAGIC_SYSRQ + bool + depends on KGDB_SYSRQ + default y + +config 4KSTACKS + def_bool y + config X86_FIND_SMP_CONFIG bool depends on X86_LOCAL_APIC || X86_VOYAGER --- linux-2.6.5-rc1/arch/i386/kernel/acpi/boot.c 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/kernel/acpi/boot.c 2004-03-17 19:44:38.080335360 -0800 @@ -372,7 +372,7 @@ acpi_scan_rsdp ( * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } --- linux-2.6.5-rc1/arch/i386/kernel/acpi/sleep.c 2003-08-08 22:55:10.000000000 -0700 +++ 25/arch/i386/kernel/acpi/sleep.c 2004-03-17 19:31:17.666016832 -0800 @@ -19,13 +19,29 @@ extern void zap_low_mappings(void); extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); -static void init_low_mapping(pgd_t *pgd, int pgd_limit) +static void map_low(pgd_t *pgd_base, unsigned long start, unsigned long end) { - int pgd_ofs = 0; - - while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { - set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD)); - pgd_ofs++, pgd++; + unsigned long vaddr; + pmd_t *pmd; + pgd_t *pgd; + int i, j; + + pgd = pgd_base; + + for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { + vaddr = i*PGDIR_SIZE; + if (end && (vaddr >= end)) + break; + pmd = pmd_offset(pgd, 0); + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (end && (vaddr >= end)) + break; + if (vaddr < start) + continue; + set_pmd(pmd, __pmd(_KERNPG_TABLE + _PAGE_PSE + + vaddr - start)); + } } } @@ -39,7 +55,9 @@ int acpi_save_state_mem (void) { if (!acpi_wakeup_address) return 1; - init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); + if (!cpu_has_pse) + return 1; + map_low(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE); memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); --- linux-2.6.5-rc1/arch/i386/kernel/apic.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/apic.c 2004-03-17 19:30:24.698069184 -0800 @@ -1182,9 +1182,6 @@ int __init APIC_init_uniprocessor (void) phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); setup_local_APIC(); - - if (nmi_watchdog == NMI_LOCAL_APIC) - check_nmi_watchdog(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) if (!skip_ioapic_setup && nr_ioapics) --- linux-2.6.5-rc1/arch/i386/kernel/asm-offsets.c 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/kernel/asm-offsets.c 2004-03-17 19:31:17.666016832 -0800 @@ -31,5 +31,19 @@ void foo(void) DEFINE(RT_SIGFRAME_sigcontext, offsetof (struct rt_sigframe, uc.uc_mcontext)); + DEFINE(TI_task, offsetof (struct thread_info, task)); + DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); + DEFINE(TI_flags, offsetof (struct thread_info, flags)); + DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); + DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); + DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); + DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); + DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); + + DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, + __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); + DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(task_thread_db7, + offsetof (struct task_struct, thread.debugreg[7])); } --- linux-2.6.5-rc1/arch/i386/kernel/cpu/common.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/cpu/common.c 2004-03-17 19:31:17.667016680 -0800 @@ -514,12 +514,16 @@ void __init cpu_init (void) set_tss_desc(cpu,t); cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); - load_LDT(&init_mm.context); + if (cpu) + load_LDT(&init_mm.context); /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; + if (cpu) + trap_init_virtual_GDT(); + /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); --- linux-2.6.5-rc1/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-03-17 19:30:05.360009016 -0800 @@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int u32 l, h; cpumask_t cpus_allowed, affected_cpu_map; struct cpufreq_freqs freqs; - int hyperthreading = 0; - int sibling = 0; + int j; if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) @@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int cpus_allowed = current->cpus_allowed; /* only run on CPU to be set, or on its sibling */ - affected_cpu_map = cpumask_of_cpu(cpu); -#ifdef CONFIG_X86_HT - hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2)); - if (hyperthreading) { - sibling = cpu_sibling_map[cpu]; - cpu_set(sibling, affected_cpu_map); - } +#ifdef CONFIG_SMP + affected_cpu_map = cpu_sibling_map[cpu]; +#else + affected_cpu_map = cpumask_of_cpu(cpu); #endif set_cpus_allowed(current, affected_cpu_map); BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map)); @@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int /* notifiers */ freqs.old = stock_freq * l / 8; freqs.new = stock_freq * newstate / 8; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (hyperthreading) { - freqs.cpu = sibling; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } } rdmsr(MSR_IA32_THERM_STATUS, l, h); @@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int set_cpus_allowed(current, cpus_allowed); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - if (hyperthreading) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } return 0; --- linux-2.6.5-rc1/arch/i386/kernel/cpu/intel.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/cpu/intel.c 2004-03-17 19:31:17.667016680 -0800 @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu.h" @@ -19,8 +20,6 @@ #include #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -165,7 +164,7 @@ static void __init init_intel(struct cpu c->f00f_bug = 1; if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); + trap_init_virtual_IDT(); printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } @@ -250,6 +249,12 @@ static void __init init_intel(struct cpu /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_bit(X86_FEATURE_SEP, c->x86_capability); + /* + * FIXME: SEP is disabled for 4G/4G for now: + */ +#ifdef CONFIG_X86_HIGH_ENTRY + clear_bit(X86_FEATURE_SEP, c->x86_capability); +#endif /* Names for the Pentium II/Celeron processors detectable only by also checking the cache size. --- linux-2.6.5-rc1/arch/i386/kernel/dmi_scan.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/dmi_scan.c 2004-03-17 19:29:21.049745200 -0800 @@ -525,7 +525,7 @@ static __init int print_if_true(struct d #ifdef CONFIG_ACPI_BOOT extern int acpi_disabled, acpi_force; -static __init __attribute__((unused)) int acpi_disable(struct dmi_blacklist *d) +static __init __attribute__((unused)) int disable_acpi(struct dmi_blacklist *d) { if (!acpi_force) { printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); @@ -933,7 +933,7 @@ static __initdata struct dmi_blacklist d * Boxes that need ACPI disabled */ - { acpi_disable, "IBM Thinkpad", { + { disable_acpi, "IBM Thinkpad", { MATCH(DMI_BOARD_VENDOR, "IBM"), MATCH(DMI_BOARD_NAME, "2629H1G"), NO_MATCH, NO_MATCH }}, --- linux-2.6.5-rc1/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/doublefault.c 2004-03-17 19:31:17.668016528 -0800 @@ -7,12 +7,13 @@ #include #include #include +#include #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) -#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) +#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) static void doublefault_fn(void) { @@ -38,8 +39,8 @@ static void doublefault_fn(void) printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", t->eax, t->ebx, t->ecx, t->edx); - printk("esi = %08lx, edi = %08lx\n", - t->esi, t->edi); + printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", + t->esi, t->edi, t->ebp); } } --- linux-2.6.5-rc1/arch/i386/kernel/edd.c 2004-03-15 22:21:04.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,837 +0,0 @@ -/* - * linux/arch/i386/kernel/edd.c - * Copyright (C) 2002, 2003, 2004 Dell Inc. - * by Matt Domsch - * disk80 signature by Matt Domsch, Andrew Wilks, and Sandeep K. Shandilya - * legacy CHS by Patrick J. LoPresti - * - * BIOS Enhanced Disk Drive Services (EDD) - * conformant to T13 Committee www.t13.org - * projects 1572D, 1484D, 1386D, 1226DT - * - * This code takes information provided by BIOS EDD calls - * fn41 - Check Extensions Present and - * fn48 - Get Device Parametes with EDD extensions - * made in setup.S, copied to safe structures in setup.c, - * and presents it in sysfs. - * - * Please see http://domsch.com/linux/edd30/results.html for - * the list of BIOSs which have been reported to implement EDD. - * If you don't see yours listed, please send a report as described there. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2.0 as published by - * the Free Software Foundation - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -/* - * Known issues: - * - refcounting of struct device objects could be improved. - * - * TODO: - * - Add IDE and USB disk device support - * - move edd.[ch] to better locations if/when one is decided - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -/* FIXME - this really belongs in include/scsi/scsi.h */ -#include <../drivers/scsi/scsi.h> -#include <../drivers/scsi/hosts.h> - -MODULE_AUTHOR("Matt Domsch "); -MODULE_DESCRIPTION("sysfs interface to BIOS EDD information"); -MODULE_LICENSE("GPL"); - -#define EDD_VERSION "0.13 2004-Mar-09" -#define EDD_DEVICE_NAME_SIZE 16 -#define REPORT_URL "http://linux.dell.com/edd/results.html" - -#define left (PAGE_SIZE - (p - buf) - 1) - -struct edd_device { - struct edd_info *info; - struct kobject kobj; -}; - -struct edd_attribute { - struct attribute attr; - ssize_t(*show) (struct edd_device * edev, char *buf); - int (*test) (struct edd_device * edev); -}; - -/* forward declarations */ -static int edd_dev_is_type(struct edd_device *edev, const char *type); -static struct pci_dev *edd_get_pci_dev(struct edd_device *edev); - -static struct edd_device *edd_devices[EDDMAXNR]; - -#define EDD_DEVICE_ATTR(_name,_mode,_show,_test) \ -struct edd_attribute edd_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .test = _test, \ -}; - -static inline struct edd_info * -edd_dev_get_info(struct edd_device *edev) -{ - return edev->info; -} - -static inline void -edd_dev_set_info(struct edd_device *edev, struct edd_info *info) -{ - edev->info = info; -} - -#define to_edd_attr(_attr) container_of(_attr,struct edd_attribute,attr) -#define to_edd_device(obj) container_of(obj,struct edd_device,kobj) - -static ssize_t -edd_attr_show(struct kobject * kobj, struct attribute *attr, char *buf) -{ - struct edd_device *dev = to_edd_device(kobj); - struct edd_attribute *edd_attr = to_edd_attr(attr); - ssize_t ret = 0; - - if (edd_attr->show) - ret = edd_attr->show(dev, buf); - return ret; -} - -static struct sysfs_ops edd_attr_ops = { - .show = edd_attr_show, -}; - -static ssize_t -edd_show_host_bus(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - int i; - - if (!edev || !info || !buf) { - return -EINVAL; - } - - for (i = 0; i < 4; i++) { - if (isprint(info->params.host_bus_type[i])) { - p += scnprintf(p, left, "%c", info->params.host_bus_type[i]); - } else { - p += scnprintf(p, left, " "); - } - } - - if (!strncmp(info->params.host_bus_type, "ISA", 3)) { - p += scnprintf(p, left, "\tbase_address: %x\n", - info->params.interface_path.isa.base_address); - } else if (!strncmp(info->params.host_bus_type, "PCIX", 4) || - !strncmp(info->params.host_bus_type, "PCI", 3)) { - p += scnprintf(p, left, - "\t%02x:%02x.%d channel: %u\n", - info->params.interface_path.pci.bus, - info->params.interface_path.pci.slot, - info->params.interface_path.pci.function, - info->params.interface_path.pci.channel); - } else if (!strncmp(info->params.host_bus_type, "IBND", 4) || - !strncmp(info->params.host_bus_type, "XPRS", 4) || - !strncmp(info->params.host_bus_type, "HTPT", 4)) { - p += scnprintf(p, left, - "\tTBD: %llx\n", - info->params.interface_path.ibnd.reserved); - - } else { - p += scnprintf(p, left, "\tunknown: %llx\n", - info->params.interface_path.unknown.reserved); - } - return (p - buf); -} - -static ssize_t -edd_show_interface(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - int i; - - if (!edev || !info || !buf) { - return -EINVAL; - } - - for (i = 0; i < 8; i++) { - if (isprint(info->params.interface_type[i])) { - p += scnprintf(p, left, "%c", info->params.interface_type[i]); - } else { - p += scnprintf(p, left, " "); - } - } - if (!strncmp(info->params.interface_type, "ATAPI", 5)) { - p += scnprintf(p, left, "\tdevice: %u lun: %u\n", - info->params.device_path.atapi.device, - info->params.device_path.atapi.lun); - } else if (!strncmp(info->params.interface_type, "ATA", 3)) { - p += scnprintf(p, left, "\tdevice: %u\n", - info->params.device_path.ata.device); - } else if (!strncmp(info->params.interface_type, "SCSI", 4)) { - p += scnprintf(p, left, "\tid: %u lun: %llu\n", - info->params.device_path.scsi.id, - info->params.device_path.scsi.lun); - } else if (!strncmp(info->params.interface_type, "USB", 3)) { - p += scnprintf(p, left, "\tserial_number: %llx\n", - info->params.device_path.usb.serial_number); - } else if (!strncmp(info->params.interface_type, "1394", 4)) { - p += scnprintf(p, left, "\teui: %llx\n", - info->params.device_path.i1394.eui); - } else if (!strncmp(info->params.interface_type, "FIBRE", 5)) { - p += scnprintf(p, left, "\twwid: %llx lun: %llx\n", - info->params.device_path.fibre.wwid, - info->params.device_path.fibre.lun); - } else if (!strncmp(info->params.interface_type, "I2O", 3)) { - p += scnprintf(p, left, "\tidentity_tag: %llx\n", - info->params.device_path.i2o.identity_tag); - } else if (!strncmp(info->params.interface_type, "RAID", 4)) { - p += scnprintf(p, left, "\tidentity_tag: %x\n", - info->params.device_path.raid.array_number); - } else if (!strncmp(info->params.interface_type, "SATA", 4)) { - p += scnprintf(p, left, "\tdevice: %u\n", - info->params.device_path.sata.device); - } else { - p += scnprintf(p, left, "\tunknown: %llx %llx\n", - info->params.device_path.unknown.reserved1, - info->params.device_path.unknown.reserved2); - } - - return (p - buf); -} - -/** - * edd_show_raw_data() - copies raw data to buffer for userspace to parse - * - * Returns: number of bytes written, or -EINVAL on failure - */ -static ssize_t -edd_show_raw_data(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - ssize_t len = sizeof (info->params); - if (!edev || !info || !buf) { - return -EINVAL; - } - - if (!(info->params.key == 0xBEDD || info->params.key == 0xDDBE)) - len = info->params.length; - - /* In case of buggy BIOSs */ - if (len > (sizeof(info->params))) - len = sizeof(info->params); - - memcpy(buf, &info->params, len); - return len; -} - -static ssize_t -edd_show_version(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += scnprintf(p, left, "0x%02x\n", info->version); - return (p - buf); -} - -static ssize_t -edd_show_disk80_sig(struct edd_device *edev, char *buf) -{ - char *p = buf; - p += scnprintf(p, left, "0x%08x\n", edd_disk80_sig); - return (p - buf); -} - -static ssize_t -edd_show_extensions(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - if (info->interface_support & EDD_EXT_FIXED_DISK_ACCESS) { - p += scnprintf(p, left, "Fixed disk access\n"); - } - if (info->interface_support & EDD_EXT_DEVICE_LOCKING_AND_EJECTING) { - p += scnprintf(p, left, "Device locking and ejecting\n"); - } - if (info->interface_support & EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT) { - p += scnprintf(p, left, "Enhanced Disk Drive support\n"); - } - if (info->interface_support & EDD_EXT_64BIT_EXTENSIONS) { - p += scnprintf(p, left, "64-bit extensions\n"); - } - return (p - buf); -} - -static ssize_t -edd_show_info_flags(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - if (info->params.info_flags & EDD_INFO_DMA_BOUNDARY_ERROR_TRANSPARENT) - p += scnprintf(p, left, "DMA boundary error transparent\n"); - if (info->params.info_flags & EDD_INFO_GEOMETRY_VALID) - p += scnprintf(p, left, "geometry valid\n"); - if (info->params.info_flags & EDD_INFO_REMOVABLE) - p += scnprintf(p, left, "removable\n"); - if (info->params.info_flags & EDD_INFO_WRITE_VERIFY) - p += scnprintf(p, left, "write verify\n"); - if (info->params.info_flags & EDD_INFO_MEDIA_CHANGE_NOTIFICATION) - p += scnprintf(p, left, "media change notification\n"); - if (info->params.info_flags & EDD_INFO_LOCKABLE) - p += scnprintf(p, left, "lockable\n"); - if (info->params.info_flags & EDD_INFO_NO_MEDIA_PRESENT) - p += scnprintf(p, left, "no media present\n"); - if (info->params.info_flags & EDD_INFO_USE_INT13_FN50) - p += scnprintf(p, left, "use int13 fn50\n"); - return (p - buf); -} - -static ssize_t -edd_show_legacy_cylinders(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += snprintf(p, left, "0x%x\n", info->legacy_cylinders); - return (p - buf); -} - -static ssize_t -edd_show_legacy_heads(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += snprintf(p, left, "0x%x\n", info->legacy_heads); - return (p - buf); -} - -static ssize_t -edd_show_legacy_sectors(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += snprintf(p, left, "0x%x\n", info->legacy_sectors); - return (p - buf); -} - -static ssize_t -edd_show_default_cylinders(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += scnprintf(p, left, "0x%x\n", info->params.num_default_cylinders); - return (p - buf); -} - -static ssize_t -edd_show_default_heads(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += scnprintf(p, left, "0x%x\n", info->params.num_default_heads); - return (p - buf); -} - -static ssize_t -edd_show_default_sectors_per_track(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += scnprintf(p, left, "0x%x\n", info->params.sectors_per_track); - return (p - buf); -} - -static ssize_t -edd_show_sectors(struct edd_device *edev, char *buf) -{ - struct edd_info *info = edd_dev_get_info(edev); - char *p = buf; - if (!edev || !info || !buf) { - return -EINVAL; - } - - p += scnprintf(p, left, "0x%llx\n", info->params.number_of_sectors); - return (p - buf); -} - - -/* - * Some device instances may not have all the above attributes, - * or the attribute values may be meaningless (i.e. if - * the device is < EDD 3.0, it won't have host_bus and interface - * information), so don't bother making files for them. Likewise - * if the default_{cylinders,heads,sectors_per_track} values - * are zero, the BIOS doesn't provide sane values, don't bother - * creating files for them either. - */ - -static int -edd_has_legacy_cylinders(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return -EINVAL; - return info->legacy_cylinders > 0; -} - -static int -edd_has_legacy_heads(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return -EINVAL; - return info->legacy_heads > 0; -} - -static int -edd_has_legacy_sectors(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return -EINVAL; - return info->legacy_sectors > 0; -} - -static int -edd_has_default_cylinders(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return -EINVAL; - return info->params.num_default_cylinders > 0; -} - -static int -edd_has_default_heads(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return -EINVAL; - return info->params.num_default_heads > 0; -} - -static int -edd_has_default_sectors_per_track(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return -EINVAL; - return info->params.sectors_per_track > 0; -} - -static int -edd_has_edd30(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - int i, nonzero_path = 0; - char c; - - if (!edev || !info) - return 0; - - if (!(info->params.key == 0xBEDD || info->params.key == 0xDDBE)) { - return 0; - } - - for (i = 30; i <= 73; i++) { - c = *(((uint8_t *) info) + i + 4); - if (c) { - nonzero_path++; - break; - } - } - if (!nonzero_path) { - return 0; - } - - return 1; -} - -static int -edd_has_disk80_sig(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) - return 0; - return info->device == 0x80; -} - -static EDD_DEVICE_ATTR(raw_data, 0444, edd_show_raw_data, NULL); -static EDD_DEVICE_ATTR(version, 0444, edd_show_version, NULL); -static EDD_DEVICE_ATTR(extensions, 0444, edd_show_extensions, NULL); -static EDD_DEVICE_ATTR(info_flags, 0444, edd_show_info_flags, NULL); -static EDD_DEVICE_ATTR(sectors, 0444, edd_show_sectors, NULL); -static EDD_DEVICE_ATTR(legacy_cylinders, 0444, edd_show_legacy_cylinders, - edd_has_legacy_cylinders); -static EDD_DEVICE_ATTR(legacy_heads, 0444, edd_show_legacy_heads, - edd_has_legacy_heads); -static EDD_DEVICE_ATTR(legacy_sectors, 0444, edd_show_legacy_sectors, - edd_has_legacy_sectors); -static EDD_DEVICE_ATTR(default_cylinders, 0444, edd_show_default_cylinders, - edd_has_default_cylinders); -static EDD_DEVICE_ATTR(default_heads, 0444, edd_show_default_heads, - edd_has_default_heads); -static EDD_DEVICE_ATTR(default_sectors_per_track, 0444, - edd_show_default_sectors_per_track, - edd_has_default_sectors_per_track); -static EDD_DEVICE_ATTR(interface, 0444, edd_show_interface, edd_has_edd30); -static EDD_DEVICE_ATTR(host_bus, 0444, edd_show_host_bus, edd_has_edd30); -static EDD_DEVICE_ATTR(mbr_signature, 0444, edd_show_disk80_sig, edd_has_disk80_sig); - - -/* These are default attributes that are added for every edd - * device discovered. - */ -static struct attribute * def_attrs[] = { - &edd_attr_raw_data.attr, - &edd_attr_version.attr, - &edd_attr_extensions.attr, - &edd_attr_info_flags.attr, - &edd_attr_sectors.attr, - NULL, -}; - -/* These attributes are conditional and only added for some devices. */ -static struct edd_attribute * edd_attrs[] = { - &edd_attr_legacy_cylinders, - &edd_attr_legacy_heads, - &edd_attr_legacy_sectors, - &edd_attr_default_cylinders, - &edd_attr_default_heads, - &edd_attr_default_sectors_per_track, - &edd_attr_interface, - &edd_attr_host_bus, - &edd_attr_mbr_signature, - NULL, -}; - -/** - * edd_release - free edd structure - * @kobj: kobject of edd structure - * - * This is called when the refcount of the edd structure - * reaches 0. This should happen right after we unregister, - * but just in case, we use the release callback anyway. - */ - -static void edd_release(struct kobject * kobj) -{ - struct edd_device * dev = to_edd_device(kobj); - kfree(dev); -} - -static struct kobj_type ktype_edd = { - .release = edd_release, - .sysfs_ops = &edd_attr_ops, - .default_attrs = def_attrs, -}; - -static decl_subsys(edd,&ktype_edd,NULL); - - -/** - * edd_dev_is_type() - is this EDD device a 'type' device? - * @edev - * @type - a host bus or interface identifier string per the EDD spec - * - * Returns 1 (TRUE) if it is a 'type' device, 0 otherwise. - */ -static int -edd_dev_is_type(struct edd_device *edev, const char *type) -{ - struct edd_info *info = edd_dev_get_info(edev); - - if (edev && type && info) { - if (!strncmp(info->params.host_bus_type, type, strlen(type)) || - !strncmp(info->params.interface_type, type, strlen(type))) - return 1; - } - return 0; -} - -/** - * edd_get_pci_dev() - finds pci_dev that matches edev - * @edev - edd_device - * - * Returns pci_dev if found, or NULL - */ -static struct pci_dev * -edd_get_pci_dev(struct edd_device *edev) -{ - struct edd_info *info = edd_dev_get_info(edev); - - if (edd_dev_is_type(edev, "PCI")) { - return pci_find_slot(info->params.interface_path.pci.bus, - PCI_DEVFN(info->params.interface_path.pci.slot, - info->params.interface_path.pci. - function)); - } - return NULL; -} - -static int -edd_create_symlink_to_pcidev(struct edd_device *edev) -{ - - struct pci_dev *pci_dev = edd_get_pci_dev(edev); - if (!pci_dev) - return 1; - return sysfs_create_link(&edev->kobj,&pci_dev->dev.kobj,"pci_dev"); -} - -/* - * FIXME - as of 15-Jan-2003, there are some non-"scsi_device"s on the - * scsi_bus list. The following functions could possibly mis-access - * memory in that case. This is actually a problem with the SCSI - * layer, which is being addressed there. Until then, don't use the - * SCSI functions. - */ - -#undef CONFIG_SCSI -#undef CONFIG_SCSI_MODULE -#if defined(CONFIG_SCSI) || defined(CONFIG_SCSI_MODULE) - -struct edd_match_data { - struct edd_device * edev; - struct scsi_device * sd; -}; - -/** - * edd_match_scsidev() - * @edev - EDD device is a known SCSI device - * @sd - scsi_device with host who's parent is a PCI controller - * - * returns 1 if a match is found, 0 if not. - */ -static int edd_match_scsidev(struct device * dev, void * d) -{ - struct edd_match_data * data = (struct edd_match_data *)d; - struct edd_info *info = edd_dev_get_info(data->edev); - struct scsi_device * sd = to_scsi_device(dev); - - if (info) { - if ((sd->channel == info->params.interface_path.pci.channel) && - (sd->id == info->params.device_path.scsi.id) && - (sd->lun == info->params.device_path.scsi.lun)) { - data->sd = sd; - return 1; - } - } - return 0; -} - -/** - * edd_find_matching_device() - * @edev - edd_device to match - * - * Search the SCSI devices for a drive that matches the EDD - * device descriptor we have. If we find a match, return it, - * otherwise, return NULL. - */ - -static struct scsi_device * -edd_find_matching_scsi_device(struct edd_device *edev) -{ - struct edd_match_data data; - struct bus_type * scsi_bus = find_bus("scsi"); - - if (!scsi_bus) { - return NULL; - } - - data.edev = edev; - - if (edd_dev_is_type(edev, "SCSI")) { - if (bus_for_each_dev(scsi_bus,NULL,&data,edd_match_scsidev)) - return data.sd; - } - return NULL; -} - -static int -edd_create_symlink_to_scsidev(struct edd_device *edev) -{ - struct pci_dev *pci_dev; - int rc = -EINVAL; - - pci_dev = edd_get_pci_dev(edev); - if (pci_dev) { - struct scsi_device * sdev = edd_find_matching_scsi_device(edev); - if (sdev && get_device(&sdev->sdev_driverfs_dev)) { - rc = sysfs_create_link(&edev->kobj, - &sdev->sdev_driverfs_dev.kobj, - "disc"); - put_device(&sdev->sdev_driverfs_dev); - } - } - return rc; -} - -#else -static int -edd_create_symlink_to_scsidev(struct edd_device *edev) -{ - return -ENOSYS; -} -#endif - - -static inline void -edd_device_unregister(struct edd_device *edev) -{ - kobject_unregister(&edev->kobj); -} - -static void edd_populate_dir(struct edd_device * edev) -{ - struct edd_attribute * attr; - int error = 0; - int i; - - for (i = 0; (attr = edd_attrs[i]) && !error; i++) { - if (!attr->test || - (attr->test && attr->test(edev))) - error = sysfs_create_file(&edev->kobj,&attr->attr); - } - - if (!error) { - edd_create_symlink_to_pcidev(edev); - edd_create_symlink_to_scsidev(edev); - } -} - -static int -edd_device_register(struct edd_device *edev, int i) -{ - int error; - - if (!edev) - return 1; - memset(edev, 0, sizeof (*edev)); - edd_dev_set_info(edev, &edd[i]); - snprintf(edev->kobj.name, EDD_DEVICE_NAME_SIZE, "int13_dev%02x", - edd[i].device); - kobj_set_kset_s(edev,edd_subsys); - error = kobject_register(&edev->kobj); - if (!error) - edd_populate_dir(edev); - return error; -} - -/** - * edd_init() - creates sysfs tree of EDD data - * - * This assumes that eddnr and edd were - * assigned in setup.c already. - */ -static int __init -edd_init(void) -{ - unsigned int i; - int rc=0; - struct edd_device *edev; - - printk(KERN_INFO "BIOS EDD facility v%s, %d devices found\n", - EDD_VERSION, eddnr); - printk(KERN_INFO "Please report your BIOS at %s\n", REPORT_URL); - - if (!eddnr) { - printk(KERN_INFO "EDD information not available.\n"); - return 1; - } - - rc = firmware_register(&edd_subsys); - if (rc) - return rc; - - for (i = 0; i < eddnr && i < EDDMAXNR && !rc; i++) { - edev = kmalloc(sizeof (*edev), GFP_KERNEL); - if (!edev) - return -ENOMEM; - - rc = edd_device_register(edev, i); - if (rc) { - kfree(edev); - break; - } - edd_devices[i] = edev; - } - - if (rc) - firmware_unregister(&edd_subsys); - return rc; -} - -static void __exit -edd_exit(void) -{ - int i; - struct edd_device *edev; - - for (i = 0; i < eddnr && i < EDDMAXNR; i++) { - if ((edev = edd_devices[i])) - edd_device_unregister(edev); - } - firmware_unregister(&edd_subsys); -} - -late_initcall(edd_init); -module_exit(edd_exit); --- linux-2.6.5-rc1/arch/i386/kernel/entry.S 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/entry.S 2004-03-17 19:31:17.670016224 -0800 @@ -43,11 +43,25 @@ #include #include #include +#include #include #include +#include #include #include #include "irq_vectors.h" + /* We do not recover from a stack overflow, but at least + * we know it happened and should be able to track it down. + */ +#ifdef CONFIG_STACK_OVERFLOW_TEST +#define STACK_OVERFLOW_TEST \ + testl $(THREAD_SIZE - 512),%esp; \ + jnz 10f; \ + call stack_overflow; \ +10: +#else +#define STACK_OVERFLOW_TEST +#endif #define nr_syscalls ((syscall_table_size)/4) @@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) #define resume_kernel restore_all #endif -#define SAVE_ALL \ +#ifdef CONFIG_X86_HIGH_ENTRY + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +/* + * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, + * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is + * left stale, so we must check whether to repeat the real stack calculation. + */ +#define repeat_if_esp_changed \ + xorl %esp, %ebp; \ + testl $-THREAD_SIZE, %ebp; \ + jnz 0b +#else +#define repeat_if_esp_changed +#endif + +/* clobbers ebx, edx and ebp */ + +#define __SWITCH_KERNELSPACE \ + cmpl $0xff000000, %esp; \ + jb 1f; \ + \ + /* \ + * switch pagetables and load the real stack, \ + * keep the stack offset: \ + */ \ + \ + movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ + \ + /* GET_THREAD_INFO(%ebp) intermixed */ \ +0: \ + movl %esp, %ebp; \ + movl %esp, %ebx; \ + andl $(-THREAD_SIZE), %ebp; \ + andl $(THREAD_SIZE-1), %ebx; \ + orl TI_real_stack(%ebp), %ebx; \ + repeat_if_esp_changed; \ + \ + movl %edx, %cr3; \ + movl %ebx, %esp; \ +1: + +#endif + + +#define __SWITCH_USERSPACE \ + /* interrupted any of the user return paths? */ \ + \ + movl EIP(%esp), %eax; \ + \ + cmpl $int80_ret_start_marker, %eax; \ + jb 33f; /* nope - continue with sysexit check */\ + cmpl $int80_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ +33: \ + cmpl $sysexit_ret_start_marker, %eax; \ + jb 44f; /* nope - continue with user check */ \ + cmpl $sysexit_ret_end_marker, %eax; \ + jb 22f; /* yes - switch to virtual stack */ \ + /* return to userspace? */ \ +44: \ + movl EFLAGS(%esp),%ecx; \ + movb CS(%esp),%cl; \ + testl $(VM_MASK | 3),%ecx; \ + jz 2f; \ +22: \ + /* \ + * switch to the virtual stack, then switch to \ + * the userspace pagetables. \ + */ \ + \ + GET_THREAD_INFO(%ebp); \ + movl TI_virtual_stack(%ebp), %edx; \ + movl TI_user_pgd(%ebp), %ecx; \ + \ + movl %esp, %ebx; \ + andl $(THREAD_SIZE-1), %ebx; \ + orl %ebx, %edx; \ +int80_ret_start_marker: \ + movl %edx, %esp; \ + movl %ecx, %cr3; \ + \ + __RESTORE_ALL; \ +int80_ret_end_marker: \ +2: + +#else /* !CONFIG_X86_HIGH_ENTRY */ + +#define __SWITCH_KERNELSPACE +#define __SWITCH_USERSPACE + +#endif + +#define __SAVE_ALL \ cld; \ pushl %es; \ pushl %ds; \ @@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) movl %edx, %ds; \ movl %edx, %es; -#define RESTORE_INT_REGS \ +#define __RESTORE_INT_REGS \ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) popl %ebp; \ popl %eax -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ -2: popl %es; \ +#define __RESTORE_REGS \ + __RESTORE_INT_REGS; \ +111: popl %ds; \ +222: popl %es; \ .section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ -4: movl $0,(%esp); \ - jmp 2b; \ +444: movl $0,(%esp); \ + jmp 111b; \ +555: movl $0,(%esp); \ + jmp 222b; \ .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ + .long 111b,444b;\ + .long 222b,555b;\ .previous - -#define RESTORE_ALL \ - RESTORE_REGS \ +#define __RESTORE_ALL \ + __RESTORE_REGS \ addl $4, %esp; \ -1: iret; \ +333: iret; \ .section .fixup,"ax"; \ -2: sti; \ +666: sti; \ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ @@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,2b; \ + .long 333b,666b;\ .previous +#define SAVE_ALL \ + __SAVE_ALL; \ + __SWITCH_KERNELSPACE; \ + STACK_OVERFLOW_TEST; + +#define RESTORE_ALL \ + __SWITCH_USERSPACE; \ + __RESTORE_ALL; +.section .entry.text,"ax" ENTRY(lcall7) pushfl # We get a different stack layout with call @@ -163,7 +280,7 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # GET_THREAD_INFO_WITH_ESP(%ebp) # GET_THREAD_INFO - movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain + movl TI_exec_domain(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp popl %eax @@ -208,7 +325,7 @@ ENTRY(resume_userspace) cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending @@ -216,18 +333,18 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all need_resched: - movl TI_FLAGS(%ebp), %ecx # need_resched set ? + movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all - movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) + movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) sti call schedule - movl $0,TI_PRE_COUNT(%ebp) + movl $0,TI_preempt_count(%ebp) cli jmp need_resched #endif @@ -246,38 +363,50 @@ sysenter_past_esp: pushl $(__USER_CS) pushl $SYSENTER_RETURN -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - pushl %eax SAVE_ALL GET_THREAD_INFO(%ebp) cmpl $(nr_syscalls), %eax jae syscall_badsys - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + + GET_THREAD_INFO(%ebp) + movl TI_virtual_stack(%ebp), %edx + movl TI_user_pgd(%ebp), %ecx + movl %esp, %ebx + andl $(THREAD_SIZE-1), %ebx + orl %ebx, %edx +sysexit_ret_start_marker: + movl %edx, %esp + movl %ecx, %cr3 +#endif + /* + * only ebx is not restored by the userspace sysenter vsyscall + * code, it assumes it to be callee-saved. + */ + movl EBX(%esp), %ebx + /* if something modifies registers it must also disable sysexit */ + movl EIP(%esp), %edx movl OLDESP(%esp), %ecx + sti sysexit - +#ifdef CONFIG_X86_SWITCH_PAGETABLES +sysexit_ret_end_marker: + nop +#endif # system call handler stub ENTRY(system_call) @@ -287,7 +416,7 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation - testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) + testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) @@ -296,10 +425,23 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work restore_all: +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernelX # returning to kernel or vm86-space + + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jz resume_kernelX + + int $3 + +resume_kernelX: +#endif RESTORE_ALL # perform work that needs to be done immediately before resumption @@ -312,7 +454,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret - movl TI_FLAGS(%ebp), %ecx + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all @@ -327,6 +469,22 @@ work_notifysig: # deal with pending s # vm86-space xorl %edx, %edx call do_notify_resume + +#if CONFIG_X86_HIGH_ENTRY + /* + * Reload db7 if necessary: + */ + movl TI_flags(%ebp), %ecx + testb $_TIF_DB7, %cl + jnz work_db7 + + jmp restore_all + +work_db7: + movl TI_task(%ebp), %edx; + movl task_thread_db7(%edx), %edx; + movl %edx, %db7; +#endif jmp restore_all ALIGN @@ -354,7 +512,7 @@ syscall_trace_entry: # perform syscall exit tracing ALIGN syscall_exit_work: - testb $_TIF_SYSCALL_TRACE, %cl + testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT), %cl jz work_pending sti # could let do_syscall_trace() call # schedule() instead @@ -382,7 +540,7 @@ syscall_badsys: */ .data ENTRY(interrupt) -.text +.previous vector=0 ENTRY(irq_entries_start) @@ -392,7 +550,7 @@ ENTRY(irq_entries_start) jmp common_interrupt .data .long 1b -.text +.previous vector=vector+1 .endr @@ -433,12 +591,17 @@ error_code: movl ES(%esp), %edi # get the function address movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) - movl %esp, %edx pushl %esi # push the error code - pushl %edx # push the pt_regs pointer movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es + +/* clobbers edx, ebx and ebp */ + __SWITCH_KERNELSPACE + + leal 4(%esp), %edx # prepare pt_regs + pushl %edx # push pt_regs + call *%edi addl $8, %esp jmp ret_from_exception @@ -529,7 +692,7 @@ nmi_stack_correct: pushl %edx call do_nmi addl $8, %esp - RESTORE_ALL + jmp restore_all nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -606,6 +769,8 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.previous + .data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ @@ -865,7 +1030,7 @@ ENTRY(sys_call_table) .long sys_epoll_create .long sys_epoll_ctl /* 255 */ .long sys_epoll_wait - .long sys_remap_file_pages + .long old_remap_file_pages .long sys_set_tid_address .long sys_timer_create .long sys_timer_settime /* 260 */ @@ -882,5 +1047,12 @@ ENTRY(sys_call_table) .long sys_utimes .long sys_fadvise64_64 .long sys_ni_syscall /* sys_vserver */ + .long sys_mq_open + .long sys_mq_unlink /* 275 */ + .long sys_mq_timedsend + .long sys_mq_timedreceive + .long sys_mq_notify + .long sys_mq_getsetattr + .long sys_remap_file_pages /* 280 */ syscall_table_size=(.-sys_call_table) --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/i386/kernel/entry_trampoline.c 2004-03-17 19:31:26.662649136 -0800 @@ -0,0 +1,75 @@ +/* + * linux/arch/i386/kernel/entry_trampoline.c + * + * (C) Copyright 2003 Ingo Molnar + * + * This file contains the needed support code for 4GB userspace + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; + +void __init init_entry_mappings(void) +{ +#ifdef CONFIG_X86_HIGH_ENTRY + + void *tramp; + int p; + + /* + * We need a high IDT and GDT for the 4G/4G split: + */ + trap_init_virtual_IDT(); + + __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); + __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); + tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); + + printk("mapped 4G/4G trampoline to %p.\n", tramp); + BUG_ON((void *)&__start___entry_text != tramp); + /* + * Virtual kernel stack: + */ + BUG_ON(__kmap_atomic_vaddr(KM_VSTACK_TOP) & (THREAD_SIZE-1)); + BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); + BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); + + /* + * set up the initial thread's virtual stack related + * fields: + */ + for (p = 0; p < ARRAY_SIZE(current->thread.stack_page); p++) + current->thread.stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE)); + + current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP); + + for (p = 0; p < ARRAY_SIZE(current->thread.stack_page); p++) { + __kunmap_atomic_type(KM_VSTACK_TOP-p); + __kmap_atomic(current->thread.stack_page[p], KM_VSTACK_TOP-p); + } +#endif + current->thread_info->real_stack = (void *)current->thread_info; + current->thread_info->user_pgd = NULL; + current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; +} + + + +void __init entry_trampoline_setup(void) +{ + /* + * old IRQ entries set up by the boot code will still hang + * around - they are a sign of hw trouble anyway, now they'll + * produce a double fault message. + */ + trap_init_virtual_GDT(); +} --- linux-2.6.5-rc1/arch/i386/kernel/head.S 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/kernel/head.S 2004-03-17 19:30:27.623624432 -0800 @@ -18,12 +18,7 @@ #include #include #include - -#define OLD_CL_MAGIC_ADDR 0x90020 -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_BASE_ADDR 0x90000 -#define OLD_CL_OFFSET 0x90022 -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ +#include /* * References to members of the new_cpu_data structure. @@ -195,22 +190,15 @@ ENTRY(startup_32_smp) call setup_idt /* - * Copy bootup parameters out of the way. First 2kB of - * _empty_zero_page is for boot parameters, second 2kB - * is for the command line. - * + * Copy bootup parameters out of the way. * Note: %esi still has the pointer to the real-mode data. */ - movl $empty_zero_page,%edi - movl $512,%ecx + movl $boot_params,%edi + movl $(PARAM_SIZE/4),%ecx cld rep movsl - xorl %eax,%eax - movl $512,%ecx - rep - stosl - movl empty_zero_page+NEW_CL_POINTER,%esi + movl boot_params+NEW_CL_POINTER,%esi andl %esi,%esi jnz 2f # New command line protocol cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR @@ -218,8 +206,8 @@ ENTRY(startup_32_smp) movzwl OLD_CL_OFFSET,%esi addl $(OLD_CL_BASE_ADDR),%esi 2: - movl $empty_zero_page+2048,%edi - movl $512,%ecx + movl $saved_command_line,%edi + movl $(COMMAND_LINE_SIZE/4),%ecx rep movsl 1: --- linux-2.6.5-rc1/arch/i386/kernel/i386_ksyms.c 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/kernel/i386_ksyms.c 2004-03-17 19:31:17.671016072 -0800 @@ -93,7 +93,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter EXPORT_SYMBOL_NOVERS(__down_failed_trylock); EXPORT_SYMBOL_NOVERS(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); /* Delay loops */ EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__udelay); @@ -107,13 +106,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); +#if !defined(CONFIG_X86_UACCESS_INDIRECT) EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__direct_strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_from_user_ll); EXPORT_SYMBOL(__copy_to_user_ll); EXPORT_SYMBOL(strnlen_user); +#else /* CONFIG_X86_UACCESS_INDIRECT */ +EXPORT_SYMBOL(direct_csum_partial_copy_generic); +#endif EXPORT_SYMBOL(dma_alloc_coherent); EXPORT_SYMBOL(dma_free_coherent); @@ -176,10 +179,13 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); #undef memcpy #undef memset +#undef memcmp extern void * memset(void *,int,__kernel_size_t); extern void * memcpy(void *,const void *,__kernel_size_t); +extern int memcmp(const void *,const void *,__kernel_size_t); EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(memset); +EXPORT_SYMBOL_NOVERS(memcmp); #ifdef CONFIG_HAVE_DEC_LOCK EXPORT_SYMBOL(atomic_dec_and_lock); --- linux-2.6.5-rc1/arch/i386/kernel/i387.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/i386/kernel/i387.c 2004-03-17 19:31:17.672015920 -0800 @@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct * static int convert_fxsr_to_user( struct _fpstate __user *buf, struct i387_fxsave_struct *fxsave ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpreg __user *to; struct _fpxreg *from; @@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) return 1; - to = &buf->_st[0]; + to = tmp; from = (struct _fpxreg *) &fxsave->st_space[0]; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f+1); + to->exponent = from->exponent; } + if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) + return 1; return 0; } static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, struct _fpstate __user *buf ) { + struct _fpreg tmp[8]; /* 80 bytes scratch area */ unsigned long env[7]; struct _fpxreg *to; struct _fpreg __user *from; @@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) return 1; + if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) + return 1; fxsave->cwd = (unsigned short)(env[0] & 0xffff); fxsave->swd = (unsigned short)(env[1] & 0xffff); @@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc fxsave->fos = env[6]; to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; + from = tmp; for ( i = 0 ; i < 8 ; i++, to++, from++ ) { unsigned long *t = (unsigned long *)to; unsigned long *f = (unsigned long *)from; - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; + *t = *f; + *(t + 1) = *(f + 1); + to->exponent = from->exponent; } return 0; } --- linux-2.6.5-rc1/arch/i386/kernel/i8259.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/i8259.c 2004-03-17 19:31:21.785390592 -0800 @@ -444,4 +444,7 @@ void __init init_IRQ(void) */ if (boot_cpu_data.hard_math && !cpu_has_fpu) setup_irq(FPU_IRQ, &fpu_irq); + + current_thread_info()->cpu = 0; + irq_ctx_init(0); } --- linux-2.6.5-rc1/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/init_task.c 2004-03-17 19:31:17.672015920 -0800 @@ -20,13 +20,13 @@ EXPORT_SYMBOL(init_mm); /* * Initial thread structure. * - * We need to make sure that this is 8192-byte aligned due to the + * We need to make sure that this is THREAD_SIZE aligned due to the * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; + { INIT_THREAD_INFO(init_task, init_thread_union) }; /* * Initial task structure. @@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; --- linux-2.6.5-rc1/arch/i386/kernel/io_apic.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/io_apic.c 2004-03-17 19:30:05.362008712 -0800 @@ -317,8 +317,7 @@ struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) \ - ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -401,6 +400,7 @@ static void do_irq_balance(void) unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; + int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; @@ -452,7 +452,7 @@ static void do_irq_balance(void) for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); @@ -471,7 +471,7 @@ tryanothercpu: for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; @@ -551,9 +551,14 @@ tryanotherirq: * We seek the least loaded sibling by making the comparison * (A+B)/2 vs B */ - if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > - CPU_IRQ(cpu_sibling_map[min_loaded])) - min_loaded = cpu_sibling_map[min_loaded]; + load = CPU_IRQ(min_loaded) >> 1; + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + if (load > CPU_IRQ(j)) { + /* This won't change cpu_sibling_map[min_loaded] */ + load = CPU_IRQ(j); + min_loaded = j; + } + } cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); --- linux-2.6.5-rc1/arch/i386/kernel/irq.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/irq.c 2004-03-17 19:31:21.786390440 -0800 @@ -75,6 +75,14 @@ irq_desc_t irq_desc[NR_IRQS] __cacheline static void register_irq_proc (unsigned int irq); /* + * per-CPU IRQ handling stacks + */ +#ifdef CONFIG_4KSTACKS +union irq_ctx *hardirq_ctx[NR_CPUS]; +union irq_ctx *softirq_ctx[NR_CPUS]; +#endif + +/* * Special irq handlers. */ @@ -213,7 +221,7 @@ inline void synchronize_irq(unsigned int * waste of time and is not what some drivers would * prefer. */ -int handle_IRQ_event(unsigned int irq, +asmlinkage int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, struct irqaction *action) { int status = 1; /* Force the "do bottom halves" bit */ @@ -436,7 +444,7 @@ asmlinkage unsigned int do_IRQ(struct pt __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); - if (unlikely(esp < (sizeof(struct thread_info) + 1024))) { + if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { printk("do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); @@ -484,11 +492,70 @@ asmlinkage unsigned int do_IRQ(struct pt * useful for irq hardware that does not mask cleanly in an * SMP environment. */ +#ifdef CONFIG_4KSTACKS + + for (;;) { + irqreturn_t action_ret; + u32 *isp; + union irq_ctx * curctx; + union irq_ctx * irqctx; + + curctx = (union irq_ctx *) current_thread_info(); + irqctx = hardirq_ctx[smp_processor_id()]; + + spin_unlock(&desc->lock); + + /* + * this is where we switch to the IRQ stack. However, if we are already using + * the IRQ stack (because we interrupted a hardirq handler) we can't do that + * and just have to keep using the current stack (which is the irq stack already + * after all) + */ + + if (curctx == irqctx) + action_ret = handle_IRQ_event(irq, ®s, action); + else { + /* build the stack frame on the IRQ stack */ + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.real_stack = curctx->tinfo.real_stack; + irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack; + irqctx->tinfo.previous_esp = current_stack_pointer(); + + *--isp = (u32) action; + *--isp = (u32) ®s; + *--isp = (u32) irq; + + asm volatile( + " xchgl %%ebx,%%esp \n" + " call handle_IRQ_event \n" + " xchgl %%ebx,%%esp \n" + : "=a"(action_ret) + : "b"(isp) + : "memory", "cc", "edx", "ecx" + ); + + + } + spin_lock(&desc->lock); + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); + if (curctx != irqctx) + irqctx->tinfo.task = NULL; + if (likely(!(desc->status & IRQ_PENDING))) + break; + desc->status &= ~IRQ_PENDING; + } + +#else + for (;;) { irqreturn_t action_ret; spin_unlock(&desc->lock); + action_ret = handle_IRQ_event(irq, ®s, action); + spin_lock(&desc->lock); if (!noirqdebug) note_interrupt(irq, desc, action_ret); @@ -496,6 +563,7 @@ asmlinkage unsigned int do_IRQ(struct pt break; desc->status &= ~IRQ_PENDING; } +#endif desc->status &= ~IRQ_INPROGRESS; out: @@ -508,6 +576,8 @@ out: irq_exit(); + kgdb_process_breakpoint(); + return 1; } @@ -1053,3 +1123,81 @@ void init_irq_proc (void) register_irq_proc(i); } + +#ifdef CONFIG_4KSTACKS +static char softirq_stack[NR_CPUS * THREAD_SIZE] __attribute__((__aligned__(THREAD_SIZE))); +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __attribute__((__aligned__(THREAD_SIZE))); + +/* + * allocate per-cpu stacks for hardirq and for softirq processing + */ +void irq_ctx_init(int cpu) +{ + union irq_ctx *irqctx; + + if (hardirq_ctx[cpu]) + return; + + irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + + hardirq_ctx[cpu] = irqctx; + + irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + + softirq_ctx[cpu] = irqctx; + + printk("CPU %u irqstacks, hard=%p soft=%p\n", + cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); +} + +extern asmlinkage void __do_softirq(void); + +asmlinkage void do_softirq(void) +{ + unsigned long flags; + struct thread_info *curctx; + union irq_ctx *irqctx; + u32 *isp; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + curctx = current_thread_info(); + irqctx = softirq_ctx[smp_processor_id()]; + irqctx->tinfo.task = curctx->task; + irqctx->tinfo.real_stack = curctx->real_stack; + irqctx->tinfo.virtual_stack = curctx->virtual_stack; + irqctx->tinfo.previous_esp = current_stack_pointer(); + + /* build the stack frame on the softirq stack */ + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + + + asm volatile( + " xchgl %%ebx,%%esp \n" + " call __do_softirq \n" + " movl %%ebx,%%esp \n" + : "=b"(isp) + : "0"(isp) + : "memory", "cc", "edx", "ecx", "eax" + ); + } + + local_irq_restore(flags); +} + +EXPORT_SYMBOL(do_softirq); +#endif --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/i386/kernel/kgdb_stub.c 2004-03-17 19:29:34.401715392 -0800 @@ -0,0 +1,2458 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned int esp; + int array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned int OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movl %eax,%esp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addl $-4,%ebx\n\t" + "movl (%ebx), %eax\n\t" + "pushl %eax\n\t" + "cmpl %esp,%ecx\n\t" + "jne 1b\n\t" + "popl %eax\n\t" + "popl %ebx\n\t" + "popl %ecx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("EAX=%08lx ", regs->eax); + printk("EBX=%08lx ", regs->ebx); + printk("ECX=%08lx ", regs->ecx); + printk("EDX=%08lx ", regs->edx); + printk("\n"); + printk("ESI=%08lx ", regs->esi); + printk("EDI=%08lx ", regs->edi); + printk("EBP=%08lx ", regs->ebp); + printk("ESP=%08lx ", (long) ®s->esp); + printk("\n"); + printk(" DS=%08x ", regs->xds); + printk(" ES=%08x ", regs->xes); + printk(" SS=%08x ", __KERNEL_DS); + printk(" FL=%08lx ", regs->eflags); + printk("\n"); + printk(" CS=%08x ", regs->xcs); + printk(" IP=%08lx ", regs->eip); +#if 0 + printk(" FS=%08x ", regs->fs); + printk(" GS=%08x ", regs->gs); +#endif + printk("\n"); + +} /* print_regs */ + +#define NEW_esp fn_call_lookaside[trap_cpu].esp + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ + gdb_regs[_ESP] = NEW_esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; + NEW_esp = gdb_regs[_ESP]; /* keep the value */ +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +int thread_list = 0; + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) +{ + unsigned long stack_page; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_ESP] = + (int) &kgdb_info.cpus_waiting[i].regs->esp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; + gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); + gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + stack_page = (unsigned long) p->thread_info; + if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > + THREAD_SIZE - sizeof(long) + stack_page) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (gdb_regs[_EBP] < stack_page || + gdb_regs[_EBP] > THREAD_SIZE - 2*sizeof(long) + stack_page) + return; + gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); + gdb_regs[_ESP] = gdb_regs[_EBP] + 8; + gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; + if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) + return; + } while (count++ < 16); + return; +} + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int mem_err = 0; +static volatile int mem_err_expected = 0; +static volatile int mem_err_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val, int may_fault) +{ + /* + * This code traps references to the area mapped to the kernel + * stack as given by the regs and, instead, stores to the + * fn_call_lookaside[cpu].array + */ + if (may_fault && + (unsigned int) addr < OLD_esp && + ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned flags; + int cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + kgdb_local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + int addr, length; + int breakno, breaktype; + char *ptr; + int newPC; + threadref thref; + int threadid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + int gdb_regs[NUMREGBYTES / 4]; + int dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) +#define NUMREGS NUMREGBYTES/4 + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + kgdb_local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time = 0, end_time, dum = 0; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movl %0,%%db7" + : /* no output */ + :"r"(0)); + + asm volatile ("movl %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (int) (&linux_regs->esp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES, 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). These may have + * meaning for ptrace, but for us it is safe to + * ignor them. We do this by dumping them into + * _GS which we also ignor, but do have memory for. + */ + int regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToInt(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + regno = + (regno >= NUMREGS ? _GS : regno); + hex2mem(ptr, (char *) &gdb_regs[regno], + 4, 0); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + mem2hex((char *) addr, + remcomOutBuffer, length, 1); + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { + hex2mem(ptr, (char *) addr, length, 1); + + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%x\n", addr); + + regs.eip = addr; + } + + newPC = regs.eip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + int *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (int); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->xcs; + *--ptr = linux_regs->eip; + *--ptr = linux_regs->ecx; + *--ptr = linux_regs->ebx; + *--ptr = linux_regs->eax; + linux_regs->ecx = NEW_esp - (sizeof (int) * 6); + linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->eip = (unsigned int) fn_call_stub; + } else { + linux_regs->eip = (unsigned int) fn_rtn_stub; + linux_regs->eax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + kgdb_local_irq_restore(flags); + return (0); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + kgdb_local_irq_restore(flags); + return (0); +} + +/* this function is used to set up exception handlers for tracing and + * breakpoints. + * This function is not needed as the above line does all that is needed. + * We leave it for backward compatitability... + */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + + * But really folks, every hear of labeled common, an old Fortran + * concept. Lots of folks can reference it and it is define if + * anyone does. Only one can initialize it at link time. We do + * this with the hook. See the statement above. No need for any + * executable code and it is ready as soon as the kernel is + * loaded. Very desirable in kernel debugging. + + linux_debug_hook = handle_exception ; + */ + + /* In case GDB is started before us, ack any packets (presumably + "$?#xx") sitting there. + putDebugChar ('+'); + + initialized = 1; + */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + kgdb_local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + kgdb_local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +} + --- linux-2.6.5-rc1/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/kernel/ldt.c 2004-03-17 19:31:17.674015616 -0800 @@ -2,7 +2,7 @@ * linux/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 1999, 2003 Ingo Molnar */ #include @@ -18,6 +18,8 @@ #include #include #include +#include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) @@ -29,34 +31,31 @@ static void flush_ldt(void *null) static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; - int oldsize; + int oldsize, newsize, i; if (mincount <= pc->size) return 0; + /* + * LDT got larger - reallocate if necessary. + */ oldsize = pc->size; mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); + newsize = mincount*LDT_ENTRY_SIZE; + for (i = 0; i < newsize; i += PAGE_SIZE) { + int nr = i/PAGE_SIZE; + BUG_ON(i >= 64*1024); + if (!pc->ldt_pages[nr]) { + pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); + if (!pc->ldt_pages[nr]) + return -ENOMEM; + clear_highpage(pc->ldt_pages[nr]); + } + } pc->size = mincount; - wmb(); - if (reload) { #ifdef CONFIG_SMP cpumask_t mask; + preempt_disable(); load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); @@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i load_LDT(pc); #endif } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } return 0; } static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { - int err = alloc_ldt(new, old->size, 0); - if (err < 0) + int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; + + err = alloc_ldt(new, size, 0); + if (err < 0) { + new->size = 0; return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + } + for (i = 0; i < nr_pages; i++) + copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); return 0; } @@ -96,6 +94,7 @@ int init_new_context(struct task_struct init_MUTEX(&mm->context.sem); mm->context.size = 0; + memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); @@ -107,23 +106,21 @@ int init_new_context(struct task_struct /* * No need to lock the MM as we are the last user + * Do not touch the ldt register, we are already + * in the next thread. */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } + int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) + __free_page(mm->context.ldt_pages[i]); + mm->context.size = 0; } static int read_ldt(void __user * ptr, unsigned long bytecount) { - int err; + int err, i; unsigned long size; struct mm_struct * mm = current->mm; @@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u size = bytecount; err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; + /* + * This is necessary just in case we got here straight from a + * context-switch where the ptes were set but no tlb flush + * was done yet. We rather avoid doing a TLB flush in the + * context-switch path and do it here instead. + */ + __flush_tlb_global(); + + for (i = 0; i < size; i += PAGE_SIZE) { + int nr = i / PAGE_SIZE, bytes; + char *kaddr = kmap(mm->context.ldt_pages[nr]); + + bytes = size - i; + if (bytes > PAGE_SIZE) + bytes = PAGE_SIZE; + if (copy_to_user(ptr + i, kaddr, size - i)) + err = -EFAULT; + kunmap(mm->context.ldt_pages[nr]); + } up(&mm->context.sem); if (err < 0) return err; @@ -158,7 +172,7 @@ static int read_default_ldt(void __user err = 0; address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); + size = 5*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); + /* + * No rescheduling allowed from this point to the install. + * + * We do a TLB flush for the same reason as in the read_ldt() path. + */ + preempt_disable(); + __flush_tlb_global(); + lp = (__u32 *) ((ldt_info.entry_number << 3) + + (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { @@ -221,6 +243,7 @@ install: *lp = entry_1; *(lp+1) = entry_2; error = 0; + preempt_enable(); out_unlock: up(&mm->context.sem); @@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, } return ret; } + +/* + * load one particular LDT into the current CPU + */ +void load_LDT_nolock(mm_context_t *pc, int cpu) +{ + struct page **pages = pc->ldt_pages; + int count = pc->size; + int nr_pages, i; + + if (likely(!count)) { + pages = &default_ldt_page; + count = 5; + } + nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) { + __kunmap_atomic_type(KM_LDT_PAGE0 - i); + __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); + } + set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); + load_LDT_desc(); +} --- linux-2.6.5-rc1/arch/i386/kernel/Makefile 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/Makefile 2004-03-17 19:31:17.674015616 -0800 @@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o + doublefault.o entry_trampoline.o obj-y += cpu/ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o @@ -25,7 +26,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o n obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o -obj-$(CONFIG_EDD) += edd.o obj-$(CONFIG_MODULES) += module.o obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o --- linux-2.6.5-rc1/arch/i386/kernel/mpparse.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/mpparse.c 2004-03-17 19:44:38.082335056 -0800 @@ -37,6 +37,7 @@ /* Have we found an MP table */ int smp_found_config; +unsigned int __initdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -168,8 +169,14 @@ void __init MP_processor_info (struct mp } if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot " - "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid); + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); return; } num_processors++; @@ -668,7 +675,7 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); --- linux-2.6.5-rc1/arch/i386/kernel/nmi.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/nmi.c 2004-03-17 19:30:24.698069184 -0800 @@ -31,9 +31,19 @@ #include #include +#ifdef CONFIG_KGDB +#include +#ifdef CONFIG_SMP +unsigned int nmi_watchdog = NMI_IO_APIC; +#else +unsigned int nmi_watchdog = NMI_LOCAL_APIC; +#endif +#else unsigned int nmi_watchdog = NMI_NONE; +#endif static unsigned int nmi_hz = HZ; -unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); /* nmi_active: @@ -66,7 +76,8 @@ int nmi_active; #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) #define P4_ESCR_OS (1<<3) #define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI (1<<26) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) #define P4_CCCR_THRESHOLD(N) ((N)<<20) #define P4_CCCR_COMPLEMENT (1<<19) #define P4_CCCR_COMPARE (1<<18) @@ -79,7 +90,7 @@ int nmi_active; #define MSR_P4_IQ_COUNTER0 0x30C #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) #define P4_NMI_IQ_CCCR0 \ - (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ + (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) int __init check_nmi_watchdog (void) @@ -107,11 +118,6 @@ int __init check_nmi_watchdog (void) } printk("OK.\n"); - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; - return 0; } @@ -322,6 +328,11 @@ static int setup_p4_watchdog(void) return 0; nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; + nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; +#endif if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) clear_msr_range(0x3F1, 2); @@ -339,12 +350,14 @@ static int setup_p4_watchdog(void) Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; } void setup_apic_nmi_watchdog (void) { + nmi_hz = 1; + switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) @@ -408,6 +421,9 @@ void touch_nmi_watchdog (void) for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; } +#ifdef CONFIG_KGDB +int tune_watchdog = 5*HZ; +#endif void nmi_watchdog_tick (struct pt_regs * regs) { @@ -421,12 +437,24 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; +#ifdef CONFIG_KGDB + if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { + +#else if (last_irq_sums[cpu] == sum) { +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; +#ifdef CONFIG_KGDB + if (alert_counter[cpu] == tune_watchdog) { + kgdb_handle_exception(2, SIGPWR, 0, regs); + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +#endif if (alert_counter[cpu] == 5*nmi_hz) { spin_lock(&nmi_print_lock); /* @@ -455,7 +483,7 @@ void nmi_watchdog_tick (struct pt_regs * * - LVTPC is masked on interrupt and must be * unmasked by the LVTPC handler. */ - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); } wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); --- linux-2.6.5-rc1/arch/i386/kernel/process.c 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/kernel/process.c 2004-03-17 19:31:26.663648984 -0800 @@ -46,6 +46,7 @@ #include #include #include +#include #ifdef CONFIG_MATH_EMULATION #include #endif @@ -303,6 +304,9 @@ void flush_thread(void) struct task_struct *tsk = current; memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); +#ifdef CONFIG_X86_HIGH_ENTRY + clear_thread_flag(TIF_DB7); +#endif memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -316,9 +320,8 @@ void release_thread(struct task_struct * if (dead_task->mm) { // temporary debugging check if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", + printk("WARNING: dead process %8s still has LDT? <%d>\n", dead_task->comm, - dead_task->mm->context.ldt, dead_task->mm->context.size); BUG(); } @@ -342,7 +345,7 @@ int copy_thread(int nr, unsigned long cl { struct pt_regs * childregs; struct task_struct *tsk; - int err; + int err, i; childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; struct_cpy(childregs, regs); @@ -353,7 +356,18 @@ int copy_thread(int nr, unsigned long cl p->thread.esp = (unsigned long) childregs; p->thread.esp0 = (unsigned long) (childregs+1); + /* + * get the two stack pages, for the virtual stack. + * + * IMPORTANT: this code relies on the fact that the task + * structure is an THREAD_SIZE aligned piece of physical memory. + */ + for (i = 0; i < ARRAY_SIZE(p->thread.stack_page); i++) + p->thread.stack_page[i] = + virt_to_page((unsigned long)p->thread_info + (i*PAGE_SIZE)); + p->thread.eip = (unsigned long) ret_from_fork; + p->thread_info->real_stack = p->thread_info; savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); @@ -505,10 +519,42 @@ struct task_struct fastcall * __switch_t __unlazy_fpu(prev_p); +#ifdef CONFIG_X86_HIGH_ENTRY +{ + int i; + /* + * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is + * needed because otherwise NMIs could interrupt the + * user-return code with a virtual stack and stale TLBs.) + */ + for (i = 0; i < ARRAY_SIZE(next->stack_page); i++) { + __kunmap_atomic_type(KM_VSTACK_TOP-i); + __kmap_atomic(next->stack_page[i], KM_VSTACK_TOP-i); + } + /* + * NOTE: here we rely on the task being the stack as well + */ + next_p->thread_info->virtual_stack = + (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP); +} +#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) + /* + * If next was preempted on entry from userspace to kernel, + * and now it's on a different cpu, we need to adjust %esp. + * This assumes that entry.S does not copy %esp while on the + * virtual stack (with interrupts enabled): which is so, + * except within __SWITCH_KERNELSPACE itself. + */ + if (unlikely(next->esp >= TASK_SIZE)) { + next->esp &= THREAD_SIZE - 1; + next->esp |= (unsigned long) next_p->thread_info->virtual_stack; + } +#endif +#endif /* * Reload esp0, LDT and the page table pointer: */ - load_esp0(tss, next); + load_virtual_esp0(tss, next_p); /* * Load the per-thread Thread-Local Storage descriptor. --- linux-2.6.5-rc1/arch/i386/kernel/ptrace.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/arch/i386/kernel/ptrace.c 2004-03-17 19:30:16.687287008 -0800 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -524,6 +525,14 @@ out: __attribute__((regparm(3))) void do_syscall_trace(struct pt_regs *regs, int entryexit) { + if (unlikely(current->audit_context)) { + if (!entryexit) + audit_syscall_entry(current, regs->orig_eax, + regs->ebx); + else + audit_syscall_exit(current); + } + if (!test_thread_flag(TIF_SYSCALL_TRACE)) return; if (!(current->ptrace & PT_PTRACED)) --- linux-2.6.5-rc1/arch/i386/kernel/reboot.c 2004-01-09 00:04:30.000000000 -0800 +++ 25/arch/i386/kernel/reboot.c 2004-03-17 19:31:17.676015312 -0800 @@ -155,12 +155,11 @@ void machine_real_restart(unsigned char CMOS_WRITE(0x00, 0x8f); spin_unlock_irqrestore(&rtc_lock, flags); - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); + /* + * Remap the first 16 MB of RAM (which includes the kernel image) + * at virtual address zero: + */ + setup_identity_mappings(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE); /* * Use `swapper_pg_dir' as our page directory. --- linux-2.6.5-rc1/arch/i386/kernel/setup.c 2004-03-15 22:21:04.000000000 -0800 +++ 25/arch/i386/kernel/setup.c 2004-03-17 19:31:04.905956656 -0800 @@ -38,10 +38,10 @@ #include #include #include +#include #include