Index: linux/Makefile diff -u linux/Makefile:1.1.1.12 linux/Makefile:1.1.3.7 --- linux/Makefile:1.1.1.12 Sat Apr 17 16:21:06 1999 +++ linux/Makefile Sat Apr 24 02:48:03 1999 @@ -86,7 +86,19 @@ # standard CFLAGS # -CFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer +CFLAGS = -Wall -Wstrict-prototypes -O2 +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) + CFLAGS += -fno-omit-frame-pointer + # Profiling is a big overhead so only turn it on if the user really wants it. + ifeq ($(CONFIG_DEBUG_MCOUNT),y) + CFLAGS += -pg + endif + ifeq ($(CONFIG_XKDEBUG),y) + CFLAGS += -g + endif +else + CFLAGS += -fomit-frame-pointer +endif ifdef CONFIG_SMP CFLAGS += -D__SMP__ @@ -104,6 +116,9 @@ # CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) + CORE_FILES +=kernel/debug/debug.o +endif FILESYSTEMS =fs/filesystems.a NETWORKS =net/network.a DRIVERS =drivers/block/block.a \ @@ -239,6 +254,10 @@ scripts/split-include include/linux/autoconf.h include/config @ touch include/config/MARKER +debug: include/linux/version.h + $(MAKE) -C scripts ktrace + $(MAKE) -C scripts/memleak all + linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) $(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/config/MARKER @@ -356,6 +375,8 @@ rm -f core `find modules/ -type f -print`; \ fi rm -f submenu* + $(MAKE) -C scripts clean + $(MAKE) -C scripts/memleak clean mrproper: clean archmrproper rm -f include/linux/autoconf.h include/linux/version.h @@ -396,7 +417,7 @@ scripts/mkdep `find $(FINDHPATH) -follow -name \*.h ! -name modversions.h -print` > .hdepend # set -e; for i in $(SUBDIRS); do $(MAKE) -C $$i fastdep ;done # let this be made through the fastdep rule in Rules.make - $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" + $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS) scripts) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS) scripts" MODVERFILE := Index: linux/Documentation/00-INDEX diff -u linux/Documentation/00-INDEX:1.1.1.2 linux/Documentation/00-INDEX:1.1.3.2 --- linux/Documentation/00-INDEX:1.1.1.2 Thu Feb 4 15:39:53 1999 +++ linux/Documentation/00-INDEX Tue Feb 23 22:52:31 1999 @@ -29,6 +29,8 @@ - info on the kernel support for extra binary formats. cdrom/ - directory with information on the CD-ROM drivers that Linux has. +debugging.txt + - summary of the integrated kernel debugging patch devices.tex - TeX source listing of all the nodes in /dev/ with major minor #'s devices.txt @@ -61,6 +63,8 @@ - directory with info about the kernel build process kmod.txt - - info on the kernel module loader/unloader (kerneld replacement) +ktrace.txt + - tracing kernel procedure flow and timing. locks.txt - info on file locking implementations, flock() vs. fcntl(), etc. logo.gif @@ -137,6 +141,8 @@ - info on the Unicode character/font mapping used in Linux. watchdog.txt - how to auto-reboot Linux if it has "fallen and can't get up". ;-) +xkdebug.txt + - debugging a running kernel from the same machine xterm-linux.xpm - XPM image of penguin logo (see logo.txt) sitting on an xterm. Index: linux/Documentation/Configure.help diff -u linux/Documentation/Configure.help:1.1.1.13 linux/Documentation/Configure.help:1.1.3.10 --- linux/Documentation/Configure.help:1.1.1.13 Sat Apr 17 16:22:23 1999 +++ linux/Documentation/Configure.help Sat Apr 24 02:48:04 1999 @@ -9517,6 +9517,145 @@ differs slightly from OSS/Free, so PLEASE READ Documentation/sound/sonicvibes. +Kernel debugging support +CONFIG_KERNEL_DEBUGGING + Shows low level kernel tracing, debugging and general hacking tools. + Mere mortals say N. + +Debug kernel stack overflows +CONFIG_DEBUG_KSTACK + If you see "kernel stack corruption. Aiee" messages, and a kernel + hacker told you to 'switch on kernel stack debugging', then this + is the right option =B-) + Do 'make clean' after changing this option! + For normal systems, this option adds noticeable overhead, so say N. + +Kernel Stack Meter +CONFIG_KSTACK_METER + With this option set the kernel will account the minimum stack size + left from the boot of the machine and which was the function that + run in that condition. This is useful to understand how much the kernel + stack need to be large. Also allow us to detect if there is some piece + of code that could be optimized to run without eat a lot of stack. + To see the current values run `cat /proc/sys/debug/kstack-meter' and + `echo -1 0 >/proc/sys/debug/kstack-meter' to reinitialize to default. + The first integer is the minimum stack size left. The second is the + function that was running in that condition. + For normal systems, this option adds noticeable overhead, so say N. + +Kernel stack overflow threshold +CONFIG_KSTACK_THRESHOLD + If the stack has less bytes than this left, assume you are headed for an + overflow. + +Detect software lockups +CONFIG_DEBUG_SOFTLOCKUP + If you see strange lockups and a kernel hacker told you to 'switch + on software lockup detection', then this is the right option =B-) + Do 'make clean' after changing this option! + For normal systems, this option adds noticeable overhead, so say N. + +Deadlock threshold +CONFIG_SOFTLOCKUP_THRESHOLD + The number of procedure calls a process can make without going + through schedule. Any process that does more calls than this number + is "looping". Alas it does not catch inline procedure calls. + +Enable kernel tracer +CONFIG_TRACE + For kernel hackers who want to know where the path of execution goes + and how much time the kernel spends in the various procedures. The + trace is stored in /proc/trace (say Y to "/proc filesystem support"!) + and in order to read it, you need the ktrace program, see + scripts/ktrace. For normal systems, this option adds noticeable + overhead, so say N. + +Size of trace buffer +CONFIG_TRACE_SIZE + The number of trace entries to store in the kernel. + +Trace timestamp +CONFIG_TRACE_TIMESTAMP + Attempts to store an accurate timestamp against each trace entry, + scripts/ktrace will calculate the interval between successive + entries. On processors where an accurate timestamp is not available, + the jiffie counter is used instead. Jiffies are almost useless + because most procedure calls run in less than one jiffie but it is + better than nothing. Recommended if want procedure times and your + cpu supports an accurate timestamp, however it adds 64 or 32 bits to + each trace entry. + +Truncated trace timestamp +CONFIG_TRACE_TRUNCTIME + If the full timestamp field is taking up too much room (64 bits per + entry on x86) and you are willing to risk wraparound of the + timestamp, say Y here. Only the last 32 bits of the timestamp will + be stored. Unless you are *really* short on storage, say N. + +Process ID for trace +CONFIG_TRACE_PID + If you want to know which process a trace table entry is for, say Y + here. Recommended but adds sizeof(pid_t) to each trace table entry. + +Cpu ID for tracer +CONFIG_TRACE_CPU + If you want to know which cpu a trace table entry is for, say Y here. + Only effective on SMP systems. Recommended but it adds sizeof(int) + to each trace table entry. + +Memleak, Kernel memory leak detection support +CONFIG_MEMLEAK + For kernel hackers who want to track down memory leaks in the + kernel, say Y here and look at scripts/memleak. Mere mortals say N. + +SMP-NMI Software Watchdog +CONFIG_NMI_WATCHDOG + This software watchdog only works on Intel-SMP compliant boxes. The + driver tweaks the IRQ-delivery mechanizm to route a periodic NMI + interrupt to all CPUs. If one of the CPUs is detected as 'locked up' + in the nmi-handler, then an oops is generated. This oops can then be + used to debug the lockup. + +SMP-NMI-Bug Workaround, Alternative Watchdog IRQ Source +CONFIG_NMI_WATCHDOG_IRQ + Some boards are buggy and cannot generate a periodic NMI interrupt. Use + non-zero IRQ as an NMI source in this case, eg. IRQ=1 (keyboard IRQ). + This has the drawback that in the case of a lockup you'll have to + generate some keyboard IRQs to see the oops. (type on the keyboard) + +Xkdebug, Kernel Debugging support +CONFIG_XKDEBUG + For kernel hackers who want to debug their running kernel from the + same computer, using xkdebug. Not to be confused with debugging a + kernel from another machine over a serial link. As you are expected + to be a kernel hacker to use this, the simple rule about learning + about this feature is "Use the source, Luke!" -- see kernel/debug. + Don't say Y unless you really know what this option does, live + debugging can easily hang your system. Note that xkdebug.o is always + built as a module. + +GCC profiling support +CONFIG_PROFILE_GCC + This option improve the kernel profiling using the gcc profiling feature. + With this option enabled the kernel will profile really, not one time + each timer interrupt. This option enabled will add a lot of overhead to + the kernel. If you want run this kernel for production and you want profiling + is better that you use the normal profiling so it' s better that you say N + here. + +Print %eip to resolve symbols from locks +CONFIG_PRINT_EIP + This will allow the kernel to print on the console the %eip + address every time a kernel function will be recalled. + This allow to resolve addresses also after a complete machine + lock. Say Y here if a kernel hacker tell you to do that. + +Semapahore deadlock detector +CONFIG_SEMAPHORE_DEADLOCK + With this option enabled the first down() that will block for more than + 20 sec will generate an Oops that will allow you to know the code path + that deadlocked. + Are you using a crosscompiler CONFIG_CROSSCOMPILE Say Y here if you are compiling the kernel on a different Index: linux/Documentation/debugging.txt diff -u /dev/null linux/Documentation/debugging.txt:1.1.3.3 --- /dev/null Sat Apr 24 02:49:29 1999 +++ linux/Documentation/debugging.txt Fri Mar 19 01:22:59 1999 @@ -0,0 +1,70 @@ +Debugging the kernel for fun and profit. + +Assorted tools of varying usefulness exist to debug the kernel. By far +the best debugging tool is the human brain. As Linus has said :- + + ... + I'm afraid that I've seen too many people fix bugs + by looking at debugger output, and that almost + inevitably leads to fixing the symptoms rather than + the underlying problems. + ... + "Use the Source, Luke, use the Source. Be one with + the code.". Think of Luke Skywalker discarding the + automatic firing system when closing on the deathstar, + and firing the proton torpedo (or whatever) manually. + _Then_ do you have the right mindset for fixing kernel + bugs. + ... + +Having said that, sometimes reading the source is not enough. The +following tools exist in the ktrace-xkdebug patch :- + + Debug kernel stack overflows + Detect software lockups + Kernel tracer (show logic flow through procedures) + + Written by Ingo Molnar . Currently + maintained by Mike Galbraith . + + Set breakpoints and single step the current kernel (xkdebug). + + kdebug was written by David Hinds + then modified by John Heidemann and renamed to + xkdebug. Currently maintained by Keith Owens . + + Print-EIP on video ram + + Improved by Andrea Arcangeli. + + Kernel stack meter + Kernel real profiling + Semaphore deadlock detector + + Developed by Andrea Arcangeli. + + +The merge of the above tools into a single patch set (ktrace-xkdebug) +and integration into the kernel by Keith Owens . +PGP 917/C817FEC9. +Fingerprint 2B 25 0A 31 02 AE CA F7 73 0C 28 69 4A 7B 65 27 + + +This patch adds assorted options underneath Kernel hacking. It also +adds commands OOPS, STACKFAULT and KERNEL_LOOP to the reboot syscall. +Note that these reboot commands are not controlled by config options, +applying the patch makes them available. Command OOPS lets root force +a kernel oops from user space, the other two commands are mainly used +for testing kernel recovery from stack overflow and never ending loops. +Trivial code to cause a kernel oops with this patch :- + +#include +#include +int main(void) +{ + return(reboot(0xfee1dead, 672274793, LINUX_REBOOT_CMD_OOPS)); +} + +Currently the IKD stuff is maintained by Andrea Arcangeli and is dowloadable at: + + ftp://e-mind.com/pub/linux/patch-ikd-arca/ Index: linux/Documentation/ktrace.txt diff -u /dev/null linux/Documentation/ktrace.txt:1.1.3.1 --- /dev/null Sat Apr 24 02:49:29 1999 +++ linux/Documentation/ktrace.txt Sun Jan 24 20:07:30 1999 @@ -0,0 +1,88 @@ +ktrace - Trace logic flow through the kernel with time stamps. + + +******* Please read debugging.txt first. ******* + + +LIMITATION: nanosecond accuracy timings on x86 CPUs works only if the + CPU has the rtdsc instruction. If you have another x86 + CPU, undef the HAVE_RTDSC define in include/asm/profiler.h. + See the 'tsc' flag in the /proc/cpuinfo flags field if + unsure. + + Alpha CPU support is not yet tested. + Intel SMP is tested + + +INSTALLATION + +If you are reading this, you have probably already applied the patch to +your kernel, now set the options and rebuild. Under Kernel Hacking, +say Y to Kernel debugging support then Y to Enable kernel tracing. +Make dep clean, recompile, install the new kernel and modules, reboot. + +Expect the new kernel to be somewhat slower than the unpatched kernel. +Check out /proc/trace, if it exists then you can go on to to the +user-space part: + +In /usr/src/linux, make debug. To get the current trace on a 166 MHz +CPU: + +scripts/ktrace --speed 166 --map /usr/src/linux/System.map > output.txt + +you should get something like this in output.txt: + +MHZ: 166. +read 4420 lines from System.map. +calibration done, estimated measurement latency: 0.34 microseconds. + +c01299ca put_unused_buffer_head + (0.90) +c011232b wake_up +<13/f0> (1.48) +c0129a26 get_more_buffer_heads + (0.61) +c012880f get_hash_table +<13/c0> (1.34) +c01296ca __brelse + (97.15) +c0129345 set_writetime + (0.11) +c0129398 refile_buffer +<10/334> (0.36) +[...] + +By default, all of the kernel except for init_task, the profiler and +xkdebug is traced. This can lead to a very busy trace file, full of +low level routines. To turn off tracing for a directory and all its +subdirectories, add the line + + override CFLAGS := $(CFLAGS:%-pg=%-g -c) + +to the relevant Makefile, before Rules.make. Delete the *.o files you +want to recompile and make zImage/modules. + +ktrace can get an exclusive lock on /proc/trace before reading it. +This allows ktrace to be suspended until an event occurs. For example, + +* User written program gets exclusive lock on /proc/trace, waits for + event to occur. + +* After starting above program, user runs ktrace with -l or --lock + options which suspends on the lock. + +* User written program detects the desired event, releases the lock. + +* ktrace runs, the resulting trace is as close to the event as + scheduling will allow. + +Sometimes you cannot read /proc/trace directly, typically because the +system is dead and ktrace cannot be run. If it is still responding to +the Magic-SysRQ key (you did select that option didn't you?) then +SysRQ-g dumps syslog and /proc/trace to all consoles, the latter is in +hex. Capture the output via a serial console on another machine +(another useful debugging option). + +After your dead machine has been restarted, take the captured hex dump +of /proc/trace and feed it to ktrace with the option "-d filename" or +"--dump filename". The lock option is ignored when reading a dumped +ktrace. + +Have fun, mail mingo@pc5829.hil.siemens.at if problems. + +Updated by: Mike Galbraith mikeg@weiden.de + +map option, dump option and kernel integration by Keith Owens . Index: linux/Documentation/sysrq.txt diff -u linux/Documentation/sysrq.txt:1.1.1.1 linux/Documentation/sysrq.txt:1.1.3.1 --- linux/Documentation/sysrq.txt:1.1.1.1 Mon Jan 18 02:29:30 1999 +++ linux/Documentation/sysrq.txt Sun Jan 24 20:07:30 1999 @@ -1,6 +1,7 @@ MAGIC SYSRQ KEY DOCUMENTATION v1.2 ------------------------------------ [Sat May 16 01:09:21 EDT 1998] + [Fri May 22 21:33:06 EST 1998 - add dumploGs, Oops] * What is the magic SysRQ key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -33,7 +34,7 @@ 'b' - Will immediately reboot the system without syncing or unmounting your disks. -'o' - Will shut your system off via APM (if configured and supported). +'f' - Will shut your system off via APM (if configured and supported). 's' - Will attempt to sync all mounted filesystems. @@ -58,6 +59,10 @@ 'l' - Send a SIGKILL to all processes, INCLUDING init. (Your system will be non-functional after this.) +'g' - Dumps log files to all registered consoles. + +'o' - Force an Oops. + * Okay, so what can I use them for? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Well, un'R'aw is very handy when your X server or a svgalib program crashes. @@ -87,6 +92,12 @@ t'E'rm and k'I'll are useful if you have some sort of runaway process you are unable to kill any other way, especially if it's spawning other processes. + +dumplo'G's is useful when the system is hung and you want to see the +log files. It is a good idea to have a serial console assigned to +capture the result. + +'O'ops forces an oops so you can get a kernel backtrace. * Sometimes SysRQ seems to get 'stuck' after using it, what can I do? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Index: linux/Documentation/xkdebug.txt diff -u /dev/null linux/Documentation/xkdebug.txt:1.1.3.1 --- /dev/null Sat Apr 24 02:49:30 1999 +++ linux/Documentation/xkdebug.txt Sun Jan 24 20:07:30 1999 @@ -0,0 +1,133 @@ +XKDEBUG, a kernel debugging interface for gdb. + + +******* Please read debugging.txt first. ******* + + +WHAT IT DOES + +Xkdebug uses gdb's "remote debugging" interface to allow symbolic +debugging of a running kernel. Gdb behaves as if it is passing +commands to a remote machine, but in fact, those commands are executed +in the kernel on another process. + +Key features: + + -- Kernel data structures can be read and modified using the usual + gdb commands for manipulating program variables. + + -- The debugger state can be initialized using a kernel trap report, + or with the current state of a process that is blocked in the + kernel. + + -- Breakpoints can be set in kernel code to observe the status of + the world at a particular point of time. + + -- Kernel code can be single-stepped. + + -- Kernel functions can be invoked using the gdb "call" command, and + can be used in expressions. + + -- Detects breakpoints that are invoked with interrupts disabled and + automatically removes them. These used to hang the machine. + + -- Requires only a single computer. + + +INSTALLATION + +If you are reading this, you have probably already applied the patch to +your kernel, now set the options and rebuild. Under Kernel Hacking, +say Y to Kernel debugging support then Y to Kernel breakpoints and +single stepping - xkdebug. Make dep clean, recompile, install the new +kernel and modules, reboot. + +The kernel patch compiles everything with "-g" and frame-pointers. The +"-g" option makes the kernel and *.o files much larger, they now +contain debugging symbols. Frame pointers mean that GCC cannot make +use of the EBP register, so you get a less-efficient kernel on your +register-poor x86. You should therefore not run a debugging kernel in +production use. (Unless, of course, you want to debug it :-). + +The kernel patch builds and install the xkdebug module. xkgdb in the +linux/script directory loads the xkdebug module, creates a device file +for talking to gdb, fires up gdb, and loads the kernel symbol table and +module symbol tables. + +Kdebug was written by David Hinds and +modified by John Heidemann to support single-stepping +and breakpointing. Most of the credit should therefore go to David, +while John takes responsibility for most of the bugs. + +Changes by Keith Owens to upgrade to 2.1.55 and to +make some attempt to avoid deadlocks when the kernel is running +disabled. All complaints on deadlock avoidance to Keith Owens. + +Upgrade to 2.1.74 for new signal semantics. Add exception checking for +kernel addresses. Integrate ktrace, xkdebug into the kernel. +Documentation updates. Keith Owens . + + + +TECHNICAL WARNINGS + +This kernel debugger should be considered a danger to your machine. It +is all too easy to deadlock a system by setting breakpoints. For +example, + + Setting a breakpoint in console handling when gdb wants to talk to + the console will deadlock your system. + + Debugging network code over a network or X11 is another good way to + kill a system. + + Putting a breakpoint in low level, heavily used common code is a Bad + Idea (TM). For example, I have hung with breakpoints on sync, + fs/namei.c get_page() and almost anywhere in memory management. + +In most cases, the only way out is the big red button, hope you like +running fsck :). Unmounting inactive filesystems or mounting them as +readonly helps a bit. + +The kernel is multi-threaded. The debugger (or at least the stub) is +not. If multiple processes stack up on one breakpoint, I make no +guarantees. + +The moment a breakpoint or single step mode is triggered and gdb takes +over, one of the first things it does is remove all current +breakpoints. When you "continue", the breakpoints are reinstated. In +most cases you will get the first task that hits a breakpoint and the +rest of the kernel will run fine, even if other kernel tasks run the +code you are debugging. Only if two tasks hit a breakpoint before gdb +has a chance to remove the breakpoint is there likely to be any +problems. + +The debugger also substantially changes the kernels mutlithreading +syncronization. Most uniprocessor Unix kernels are not pre-emptable +when in kernel mode. The authors code assuming that context switches +occur only when they do things which might sleep. The kernel will +sleep a process on a breakpoint, so if you put one where in the middle +of code the author thought was ``atomic'' you can break things. Big +things. Make sure you didn't introduce the bug before you report it. + + +OBTAINING XKDEBUG + +New versions of ktrace-xkdebug are at ftp://ftp.ocs.com.au/pub. File +name patch-ikd-n.n.n.gz. Be gentle, it is only a 28.8K line. + +Keith Owens + PGP 917/C817FEC9 + Fingerprint 2B 25 0A 31 02 AE CA F7 73 0C 28 69 4A 7B 65 27 + +Old versions of xkdebug can be obtained by http from +. +If kdebug and xkdebug merge, another location may become +``authorative'', but I'll keep a pointer here. + +Xkdebug is a modification of kdebug-1.2, +available from Index: linux/arch/alpha/config.in diff -u linux/arch/alpha/config.in:1.1.1.3 linux/arch/alpha/config.in:1.1.3.2 --- linux/arch/alpha/config.in:1.1.1.3 Thu Feb 4 15:38:47 1999 +++ linux/arch/alpha/config.in Tue Feb 23 22:52:33 1999 @@ -287,4 +287,7 @@ fi bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ + +source kernel/debug/Config.in + endmenu Index: linux/arch/alpha/kernel/entry.S diff -u linux/arch/alpha/kernel/entry.S:1.1.1.3 linux/arch/alpha/kernel/entry.S:1.1.3.3 --- linux/arch/alpha/kernel/entry.S:1.1.1.3 Thu Feb 4 15:38:48 1999 +++ linux/arch/alpha/kernel/entry.S Tue Feb 23 22:52:33 1999 @@ -115,6 +115,17 @@ ldq $28,144($30); \ addq $30,184,$30 +/* + * Conditionally do profiling + */ +#ifdef CONFIG_TRACER +#define CALL_MCOUNT \ + lda $28,_mcount; \ + jsr $28,($28),_mcount +#else +#define CALL_MCOUNT +#endif + .text .set noat #if defined(__linux__) && !defined(__ELF__) @@ -137,6 +148,8 @@ .ent entMM entMM: SAVE_ALL + ldq $8,current_set + CALL_MCOUNT /* save $9 - $15 so the inline exception code can manipulate them. */ subq $30,56,$30 stq $9,0($30) @@ -385,6 +398,11 @@ .ent entUna entUna: lda $30,-256($30) +#ifdef CONFIG_TRACER + stq $8,64($30) + ldq $8,current_set +#endif + CALL_MCOUNT stq $0,0($30) ldq $0,256($30) /* get PS */ stq $1,8($30) @@ -396,7 +414,10 @@ stq $5,40($30) stq $6,48($30) stq $7,56($30) +#ifndef CONFIG_TRACER stq $8,64($30) + ldq $8,current_set +#endif stq $9,72($30) stq $10,80($30) stq $11,88($30) @@ -458,8 +479,13 @@ .ent entUnaUser entUnaUser: ldq $0,0($30) /* restore original $0 */ +#ifdef CONFIG_TRACER + ldq $8,64($30) +#endif lda $30,256($30) /* pop entUna's stack frame */ + SAVE_ALL /* setup normal kernel stack */ + ldq $8,current_set lda $30,-56($30) stq $9,0($30) stq $10,8($30) @@ -591,6 +617,7 @@ beq $4,restore_all bne $5,signal_return restore_all: + CALL_MCOUNT RESTORE_ALL rti Index: linux/arch/alpha/lib/Makefile diff -u linux/arch/alpha/lib/Makefile:1.1.1.2 linux/arch/alpha/lib/Makefile:1.1.3.1 --- linux/arch/alpha/lib/Makefile:1.1.1.2 Mon Jan 18 14:35:50 1999 +++ linux/arch/alpha/lib/Makefile Sun Jan 24 20:06:27 1999 @@ -10,6 +10,10 @@ csum_ipv6_magic.o strcasecmp.o semaphore.o \ srm_dispatch.o srm_fixup.o srm_puts.o srm_printk.o +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) + OBJS += _mcount.o +endif + lib.a: $(OBJS) $(AR) rcs lib.a $(OBJS) Index: linux/arch/alpha/mm/fault.c diff -u linux/arch/alpha/mm/fault.c:1.1.1.1 linux/arch/alpha/mm/fault.c:1.1.3.1 --- linux/arch/alpha/mm/fault.c:1.1.1.1 Mon Jan 18 02:29:00 1999 +++ linux/arch/alpha/mm/fault.c Sun Jan 24 20:06:28 1999 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -136,6 +137,8 @@ } } + prof_trap_entry(); + down(&mm->mmap_sem); lock_kernel(); vma = find_vma(mm, address); @@ -199,5 +202,6 @@ do_exit(SIGKILL); out: unlock_kernel(); + prof_trap_exit(); } Index: linux/arch/i386/config.in diff -u linux/arch/i386/config.in:1.1.1.4 linux/arch/i386/config.in:1.1.3.3 --- linux/arch/i386/config.in:1.1.1.4 Thu Feb 4 15:38:56 1999 +++ linux/arch/i386/config.in Tue Mar 23 01:19:43 1999 @@ -197,5 +197,22 @@ #bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ + +source kernel/debug/Config.in + +# arch specific debugging options +if [ "$CONFIG_KERNEL_DEBUGGING" = "y" ]; then + if [ "$CONFIG_MODULES" = "y" ]; then + bool ' Xkdebug, kernel breakpoints and single stepping' CONFIG_XKDEBUG n + fi + bool ' Print %eip to resolve symbols from locks' CONFIG_PRINT_EIP n + bool ' Kernel memory leak detection support' CONFIG_MEMLEAK n + if [ "$CONFIG_SMP" = "y" ]; then + bool ' SMP-IOAPIC NMI Software Watchdog' CONFIG_NMI_WATCHDOG + if [ "$CONFIG_NMI_WATCHDOG" = "y" ]; then + int ' watchdog source IRQ' CONFIG_NMI_WATCHDOG_IRQ 0 + fi + fi +fi endmenu Index: linux/arch/i386/kernel/Makefile diff -u linux/arch/i386/kernel/Makefile:1.1.1.2 linux/arch/i386/kernel/Makefile:1.1.3.2 --- linux/arch/i386/kernel/Makefile:1.1.1.2 Sat Jan 23 17:25:25 1999 +++ linux/arch/i386/kernel/Makefile Wed Mar 31 16:10:57 1999 @@ -7,6 +7,9 @@ # # Note 2! The CFLAGS definitions are now in the main makefile... +MOD_SUB_DIRS := debug +ALL_SUB_DIRS := debug + .S.o: $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o @@ -52,5 +55,19 @@ head.o: head.S $(TOPDIR)/include/linux/tasks.h $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $*.S -o $*.o + +# Not safe to have tracing turned on in the init_task. That way lies deadlock. +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) +init_task.o: init_task.c $(TOPDIR)/include/linux/sched.h + $(CC) $(CFLAGS:%-pg=%-g -c) $(EXTRA_CFLAGS) -c -o $@ $< +endif + +# avoid the nmi irq to happen inside a spin_lock_irq in the mcount suff +ifeq ($(CONFIG_DEBUG_MCOUNT),y) +ifeq ($(CONFIG_NMI_WATCHDOG),y) +traps.o: traps.c + $(CC) $(CFLAGS:%-pg=%-g -c) $(EXTRA_CFLAGS) -c -o $@ $< +endif +endif include $(TOPDIR)/Rules.make Index: linux/arch/i386/kernel/entry.S diff -u linux/arch/i386/kernel/entry.S:1.1.1.1 linux/arch/i386/kernel/entry.S:1.1.3.2 --- linux/arch/i386/kernel/entry.S:1.1.1.1 Mon Jan 18 02:28:57 1999 +++ linux/arch/i386/kernel/entry.S Tue Mar 23 16:31:53 1999 @@ -42,6 +42,7 @@ #include #include +#include #include #define ASSEMBLY #include @@ -170,6 +171,13 @@ pushl %eax # save orig_eax SAVE_ALL GET_CURRENT(%ebx) +#ifdef CONFIG_DEBUG_MCOUNT + pushl %eax + pushl %ebx + call SYMBOL_NAME(mcount) + popl %ebx + popl %eax +#endif cmpl $(NR_syscalls),%eax jae badsys testb $0x20,flags(%ebx) # PF_TRACESYS @@ -180,6 +188,11 @@ .globl ret_from_sys_call .globl ret_from_intr ret_from_sys_call: +#ifdef CONFIG_DEBUG_MCOUNT + pushl %eax + call SYMBOL_NAME(mcount) + popl %eax +#endif movl SYMBOL_NAME(bh_mask),%eax andl SYMBOL_NAME(bh_active),%eax jne handle_bottom_half @@ -197,16 +210,35 @@ testl $(VM_MASK),EFLAGS(%esp) movl %esp,%eax jne v86_signal_return +#ifndef CONFIG_KERNEL_DEBUGGING xorl %edx,%edx +#else + pushl $0 + pushl %eax +#endif call SYMBOL_NAME(do_signal) +#ifdef CONFIG_KERNEL_DEBUGGING + addl $8,%esp +#endif jmp restore_all ALIGN v86_signal_return: +#ifdef CONFIG_KERNEL_DEBUGGING + pushl %eax +#endif call SYMBOL_NAME(save_v86_state) movl %eax,%esp +#ifndef CONFIG_KERNEL_DEBUGGING xorl %edx,%edx +#else + pushl $0 + pushl %eax +#endif call SYMBOL_NAME(do_signal) +#ifdef CONFIG_KERNEL_DEBUGGING + addl $8,%esp +#endif jmp restore_all ALIGN @@ -300,9 +332,21 @@ jmp error_code ENTRY(nmi) +#ifdef CONFIG_NMI_WATCHDOG + pushl %eax + SAVE_ALL + movl %esp,%edx +#endif pushl $0 +#ifndef CONFIG_NMI_WATCHDOG pushl $ SYMBOL_NAME(do_nmi) jmp error_code +#else + pushl %edx + call SYMBOL_NAME(do_nmi) + addl $8,%esp + RESTORE_ALL +#endif /* CONFIG_NMI_WATCHDOG */ ENTRY(int3) pushl $0 Index: linux/arch/i386/kernel/io_apic.c diff -u linux/arch/i386/kernel/io_apic.c:1.1.1.5 linux/arch/i386/kernel/io_apic.c:1.1.3.5 --- linux/arch/i386/kernel/io_apic.c:1.1.1.5 Sat Apr 17 16:21:26 1999 +++ linux/arch/i386/kernel/io_apic.c Sat Apr 24 02:48:05 1999 @@ -21,6 +21,11 @@ */ #define IO_APIC_BASE ((volatile int *)fix_to_virt(FIX_IO_APIC_BASE)) +#ifdef CONFIG_NMI_WATCHDOG +int nmi_pin = -1; +const int nmi_irq = CONFIG_NMI_WATCHDOG_IRQ; +#endif + /* * The structure of the IO-APIC: */ @@ -327,6 +332,19 @@ } /* + * Unclear documentation on what a "conforming ISA interrupt" means. + * + * Should we, or should we not, take the ELCR register into account? + * It's part of the EISA specification, but maybe it should only be + * used if the interrupt is actually marked as EISA? + * + * Oh, well. Don't do it until somebody tells us what the right thing + * to do is.. + */ +#undef USE_ELCR_TRIGGER_LEVEL +#ifdef USE_ELCR_TRIGGER_LEVEL + +/* * EISA Edge/Level control register, ELCR */ static int __init EISA_ELCR(unsigned int irq) @@ -622,6 +640,18 @@ if (!IO_APIC_IRQ(irq)) continue; +#ifdef CONFIG_NMI_WATCHDOG + if (irq == nmi_irq) { + entry.delivery_mode = 4; /* broadcast NMI */ + make_8259A_irq(irq); + /* + * Remember which register has the NMI IRQ entry, + * so we can turn it off in case there is some + * screwup + */ + nmi_pin = pin; + } +#endif entry.vector = assign_irq_vector(irq); @@ -1173,6 +1203,10 @@ * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (i = 0; i < NR_IRQS ; i++) { +#ifdef CONFIG_NMI_WATCHDOG + if (i == nmi_irq) + continue; +#endif if (IO_APIC_VECTOR(i) > 0) { if (IO_APIC_irq_trigger(i)) irq_desc[i].handler = &ioapic_level_irq_type; @@ -1214,6 +1248,12 @@ { int pin1, pin2; +#ifdef CONFIG_NMI_WATCHDOG + if (nmi_pin == -1) + printk(".. NMI watchdog has invalid source IRQ.\n"); + else if (nmi_irq != -1) + printk("NMI Watchdog activated on source IRQ %d\n", nmi_irq); +#endif pin1 = find_timer_pin(mp_INT); pin2 = find_timer_pin(mp_ExtINT); enable_IO_APIC_irq(0); @@ -1248,6 +1288,10 @@ } } printk(" works.\n"); +#ifdef CONFIG_NMI_WATCHDOG + if ((nmi_pin != -1) && (nmi_irq == 0)) + printk("NMI Watchdog disabled (source IRQ was 0)!\n"); +#endif } } Index: linux/arch/i386/kernel/irq.c diff -u linux/arch/i386/kernel/irq.c:1.1.1.4 linux/arch/i386/kernel/irq.c:1.1.3.3 --- linux/arch/i386/kernel/irq.c:1.1.1.4 Sat Apr 17 16:21:26 1999 +++ linux/arch/i386/kernel/irq.c Sat Apr 24 02:48:05 1999 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -805,6 +806,10 @@ kstat.irqs[cpu][irq]++; irq_desc[irq].handler->handle(irq, ®s); +#ifdef CONFIG_DEBUG_MCOUNT + mcount(); +#endif + /* * This should be conditional: we should really get * a return code from the irq handler to tell us @@ -815,6 +820,10 @@ if (bh_active & bh_mask) do_bottom_half(); } + +#ifdef CONFIG_DEBUG_MCOUNT + mcount(); +#endif } int setup_x86_irq(unsigned int irq, struct irqaction * new) Index: linux/arch/i386/kernel/smp.c diff -u linux/arch/i386/kernel/smp.c:1.1.1.7 linux/arch/i386/kernel/smp.c:1.1.3.5 --- linux/arch/i386/kernel/smp.c:1.1.1.7 Sat Apr 17 16:21:27 1999 +++ linux/arch/i386/kernel/smp.c Sat Apr 24 02:48:05 1999 @@ -1687,8 +1687,10 @@ * updated with atomic operations). This is especially * useful with a profiling multiplier != 1 */ +#ifndef CONFIG_PROFILE_GCC if (!user_mode(regs)) x86_do_profile(regs->eip); +#endif if (!--prof_counter[cpu]) { int user=0,system=0; @@ -1751,8 +1753,17 @@ * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ +#ifdef CONFIG_NMI_WATCHDOG +atomic_t apic_timer_irqs [NR_CPUS] = { ATOMIC_INIT(0), }; +#endif void smp_apic_timer_interrupt(struct pt_regs * regs) { +#ifdef CONFIG_NMI_WATCHDOG + /* + * the only thing that can lock an NMI is an unACK-ed APIC ... + */ + atomic_inc(apic_timer_irqs+smp_processor_id()); +#endif /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow, and we Index: linux/arch/i386/kernel/time.c diff -u linux/arch/i386/kernel/time.c:1.1.1.5 linux/arch/i386/kernel/time.c:1.1.3.3 --- linux/arch/i386/kernel/time.c:1.1.1.5 Wed Mar 24 01:43:46 1999 +++ linux/arch/i386/kernel/time.c Thu Mar 25 02:27:02 1999 @@ -369,8 +369,10 @@ * system, in that case we have to call the local interrupt handler. */ #ifndef __SMP__ +#ifndef CONFIG_PROFILE_GCC if (!user_mode(regs)) x86_do_profile(regs->eip); +#endif #else if (!smp_found_config) smp_local_timer_interrupt(regs); Index: linux/arch/i386/kernel/traps.c diff -u linux/arch/i386/kernel/traps.c:1.1.1.3 linux/arch/i386/kernel/traps.c:1.1.3.11 --- linux/arch/i386/kernel/traps.c:1.1.1.3 Sat Feb 20 16:38:05 1999 +++ linux/arch/i386/kernel/traps.c Wed Mar 31 16:20:02 1999 @@ -2,6 +2,8 @@ * linux/arch/i386/traps.c * * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1998, Ingo Molnar, added NMI-Watchdog driver */ /* @@ -20,6 +22,8 @@ #include #include #include +#include /* NMI oopser watchdog */ +#include /* mcount debugger */ #ifdef CONFIG_MCA #include @@ -117,9 +121,65 @@ * segments. VMALLOC_OFFSET comes from mm/vmalloc.c; MODULE_RANGE is * a guess of how much space is likely to be vmalloced. */ +#ifndef CONFIG_MEMLEAK #define VMALLOC_OFFSET (8*1024*1024) +#endif #define MODULE_RANGE (8*1024*1024) +#ifdef CONFIG_TRACE +inline void print_call_trace_exact (struct pt_regs * regs) +{ + int i=1; + unsigned long *this_stack, *prev_stack, prev_addr, *prev_bp, framesize; + + printk("\nCall Trace: "); + + /* + * the stack layout: /----- *this_stack + * V + * [this_frame][prev_bp][prev_addr][prev_frame][...] + */ + + /* + * FIXME: i'm ruthlessly abusing linear mapping on i386 + */ + + this_stack = (unsigned long *) regs->ebp; + framesize=0; + + while (((long) this_stack & 4095) != 0) { + prev_addr = *(this_stack+1); + + if (i && ((i % 8) == 0)) + printk("\n "); + /* ksymoops expects [] */ + printk("[<%08lx>] (%lu) ", prev_addr, framesize); + i++; + + prev_bp = (unsigned long *)(*this_stack); + prev_stack = this_stack; + this_stack = prev_bp; + + if ( + ((unsigned long)this_stack<0xc0000000UL) + + || i>100 ) { + + if ((unsigned long)this_stack<0xc0000000UL) + break; + + printk("WARNING: something fishy with the stack frame?\n"); + printk("this_stack: [<%08lx>]\n", + (unsigned long)this_stack); + printk("i: %d.\n", i); + break; + } + framesize = (unsigned long)this_stack-(unsigned long)prev_stack; + } + print_emergency_trace(); +} +#endif /* CONFIG_TRACE */ + static void show_registers(struct pt_regs *regs) { int i; @@ -161,6 +221,14 @@ printk("\n "); printk("%08lx ", *stack++); } + +/* + * If tracing is switched on then we can walk the stack frame. Otherwise we + * can only guess. + */ +#ifdef CONFIG_TRACE + print_call_trace_exact(regs); +#else printk("\nCall Trace: "); stack = (unsigned long *) esp; i = 1; @@ -186,6 +254,7 @@ i++; } } +#endif /* CONFIG_TRACE */ printk("\nCode: "); for(i=0;i<20;i++) printk("%02x ", ((unsigned char *)regs->eip)[i]); @@ -205,6 +274,24 @@ do_exit(SIGSEGV); } +#ifdef CONFIG_XKDEBUG +int kernel_default_int_hook(struct pt_regs * regs, long error_code, int intr) +{ + /* Indicate that the interrupt has not been handled. */ + return 0; +} + +/* + * To allow multiple people (i.e., at least kdebug and kitrace) + * to intercept kernel interrupts, we allow a chain of hooks. + * A handler should return non-zero if the interrupt has accounted for. + */ +int (*kernel_int1_hook)(struct pt_regs * regs, long error_code, int intr) = kernel_default_int_hook; +int (*kernel_int3_hook)(struct pt_regs * regs, long error_code, int intr) = kernel_default_int_hook; + + +#endif /* CONFIG_XKDEBUG */ + static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) @@ -226,7 +313,30 @@ } DO_VM86_ERROR( 0, SIGFPE, "divide error", divide_error, current) +#ifdef CONFIG_XKDEBUG +asmlinkage void do_int3(struct pt_regs * regs, long error_code) +{ + /* Are we in the kernel and can kdebug handle it? */ + if (!((regs->eflags & VM_MASK) || (3 & regs->xcs) == 3)) { + if ((*kernel_int3_hook)(regs, error_code, 3)) + return; + } + /* No. Reproduce old routine. */ + if (current->flags & PF_PTRACED) { + unsigned long flags; + spin_lock_irqsave(¤t->sigmask_lock, flags); + sigdelset(¤t->blocked, SIGTRAP); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + } + send_sig(SIGTRAP, current, 1); + current->tss.trap_no = SIGTRAP; + current->tss.error_code = error_code; + die_if_kernel("int3",regs,error_code); +} +#else /* ! CONFIG_XKDEBUG */ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3, current) +#endif /* CONFIG_XKDEBUG */ DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow, current) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds, current) DO_ERROR( 6, SIGILL, "invalid operand", invalid_op, current) @@ -248,6 +358,12 @@ handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; } +#ifdef CONFIG_XKDEBUG + if ((regs->xcs & 3) == 0) { + if ((*kernel_int1_hook)(regs, error_code, 1)) + return; + }; +#endif /* CONFIG_XKDEBUG */ die_if_kernel("cache flush denied",regs,error_code); current->tss.error_code = error_code; current->tss.trap_no = 19; @@ -285,6 +401,7 @@ } } +#ifndef CONFIG_NMI_WATCHDOG static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); @@ -335,6 +452,73 @@ if (!(reason & 0xc0)) unknown_nmi_error(reason, regs); } +#else /* CONFIG_NMI_WATCHDOG */ + +extern atomic_t nmi_counter; + +/* + * FIXME: we assume here that the NMI came from the IO-APIC. It's a quite safe + * assumption in most cases, but if anyone knows a way to distinguish between + * NMI reasons, please speak up ... [i doubt that the IO-APIC does IO port 0x61 + * correctly] + */ + +extern atomic_t apic_timer_irqs [NR_CPUS]; +extern spinlock_t console_lock; +static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; + +asmlinkage void do_nmi(struct pt_regs * regs, long error_code) +{ + /* + * the best way to detect wether a CPU has a 'hard lockup' problem + * is to check it's local APIC timer IRQ counts. If they are not + * changing then that CPU has some problem. + * + * as these watchdog NMI IRQs are broadcasted to every CPU, here + * we only have to check the current processor. + * + * since NMIs dont listen to _any_ locks, we have to be extremely + * careful not to rely on unsafe variables. The printk might lock + * up though, so we have to break up console_lock first ... + * [when there will be more tty-related locks, break them up + * here too!] + */ + + static atomic_t last_irq_sums [NR_CPUS] = { ATOMIC_INIT(0), }; + static atomic_t alert_counter [NR_CPUS] = { ATOMIC_INIT(0), }; + + /* + * Since current-> is always on the stack, and we always switch + * the stack NMI-atomically, it's safe to use smp_processor_id(). + */ + int sum, cpu = smp_processor_id(); + + atomic_inc(&nmi_counter); + sum = atomic_read(apic_timer_irqs+cpu); + + if (atomic_read(last_irq_sums+cpu) == sum) { + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + atomic_inc(alert_counter+cpu); + if (atomic_read(alert_counter+cpu) == HZ/10) { +#ifdef CONFIG_DEBUG_MCOUNT + extern int sysctl_disable_mcount; + sysctl_disable_mcount = 1; +#endif + spin_lock(&nmi_print_lock); + printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); + show_registers(regs); + spin_unlock(&nmi_print_lock); + do_exit(SIGSEGV); + } + } else { + atomic_set(last_irq_sums+cpu,sum); + atomic_set(alert_counter+cpu,0); + } +} +#endif /* CONFIG_NMI_WATCHDOG */ /* * Careful - we must not do a lock-kernel until we have checked that the Index: linux/arch/i386/kernel/debug/Makefile diff -u /dev/null linux/arch/i386/kernel/debug/Makefile:1.1.3.1 --- /dev/null Sat Apr 24 02:49:33 1999 +++ linux/arch/i386/kernel/debug/Makefile Sun Jan 24 20:06:37 1999 @@ -0,0 +1,18 @@ +# +# Makefile for the kernel xkdebugger. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +# CONFIG_XKDEBUG is bool but xkdebug can only be used as a module. KAO +ifeq ($(CONFIG_XKDEBUG),y) + MOD_LIST_NAME := MISC_XKDEBUG_MODULES + M_OBJS = xkdebug.o + # Profiling in xkdebug is probably not a good idea. + override CFLAGS := $(CFLAGS:%-pg=%-g -c) +endif + +include $(TOPDIR)/Rules.make Index: linux/arch/i386/kernel/debug/xkdebug.c diff -u /dev/null linux/arch/i386/kernel/debug/xkdebug.c:1.1.3.1 --- /dev/null Sat Apr 24 02:49:33 1999 +++ linux/arch/i386/kernel/debug/xkdebug.c Sun Jan 24 20:06:37 1999 @@ -0,0 +1,710 @@ +/*====================================================================== + + Dummy device for "remote" kernel debugging with gdb + + Copyright (C) 1995 by David Hinds, dhinds@allegro.stanford.edu + Single-step/breakpoint modifications + Copyright (C) 1995 by John Heidemann . + + Upgraded to 2.1.55. Minor bug fixes. Automatic removal of + breakpoints that are invoked with interrupts disabled. + 1997/09/15 Keith Owens + + Upgrade to 2.1.74 for new signal semantics. Add exception checking + for kernel addresses. Move xkdebug.[co] to arch/i386/kernel/. + Documentation updates. + 1997/12/20 Keith Owens + + $ Id: xkdebug.c,v 1.2 1995/10/16 20:53:10 johnh Exp $ + Derived from: + kdebug.c 1.6 1995/06/07 06:05:50 (David Hinds) + +======================================================================*/ + +#include + +#include +#include +#include +#include + +#include + +/* The address checking in this module is back to front from the rest + * of the kernel. We happily accept any data areas from user space + * (gdb). However we have to verify that kernel reads and writes in + * response to gdb requests are valid. It is too easy to pass an + * invalid kernel address, take an exception and leave xkdebug.o still + * loaded. Don't be at all surprised to get log messages like + * "gdb: Exception at [] cr2=x (fixup: xxxxxxxx)". + * I might even check user space addresses one day. KAO. + */ + +#define get_kernel(x,ptr) __get_user(x,ptr) +#define put_kernel(x,ptr) __put_user(x,ptr) + +/*====================================================================*/ + +static ssize_t debug_read (struct file *filp, char *buf, + size_t count, loff_t *ppos); +static ssize_t debug_write (struct file *filp, const char *buf, + size_t count, loff_t *ppos); +static int debug_ioctl(struct inode *inode, struct file *file, + u_int cmd, u_long arg); +static int debug_open(struct inode *inode, struct file *file); +static int debug_release(struct inode *inode, struct file *file); + +static struct file_operations debug_fops = +{ + read: debug_read, + write: debug_write, + ioctl: debug_ioctl, + open: debug_open, + release: debug_release +}; + +/* Major device # for debug pseudo-device */ +static int major_dev = 0; + +/* Device in use? */ +static int is_open = 0; + +static int log_packets = 0; /* report packets as they go by? */ + +/* Input and output buffers */ +#define BUFSIZE 512 +static u_char *inq, *outp, *outq; +static u_char inbuf[BUFSIZE], outbuf[BUFSIZE]; + +/*====================================================================*/ + +#define STACKSIZE 4096 +/* Registers must match gdb's preferences. */ +enum registers { + R_EAX, R_ECX, R_EDX, R_EBX, R_XESP, R_EBP, R_XESI, R_EDI, + R_EIP, R_EFLAGS, R_XCS, R_XSS, R_XDS, R_XES, R_XFS, R_XGS, +}; +#define NREG ((int)R_XGS + 1) + +static int float_stack = 0; +static u_char stack[STACKSIZE]; + +#define EFLAGS_TRACE_FLAG 0x100 +/* Map to convert gdb <-> kernel regs */ +#define KREGS_OFFSET(f) ((char*)(&((struct pt_regs*)NULL)->f)-(char*)NULL) +#define COMPUTE_KREGS_PLACE(kregs,off) (&(((u_char*) (kregs))[off])) +static struct { + int offset; + int size; +} kregs_info[] = { + { KREGS_OFFSET(eax), 4 }, + { KREGS_OFFSET(ecx), 4 }, + { KREGS_OFFSET(edx), 4 }, + { KREGS_OFFSET(ebx), 4 }, + { KREGS_OFFSET(esp), 4 }, + { KREGS_OFFSET(ebp), 4 }, + { KREGS_OFFSET(esi), 4 }, + { KREGS_OFFSET(edi), 4 }, + { KREGS_OFFSET(eip), 4 }, + { KREGS_OFFSET(eflags), 4 }, + { KREGS_OFFSET(xcs), 4 }, + { KREGS_OFFSET(xss), 4 }, + { KREGS_OFFSET(xds), 4 }, + { KREGS_OFFSET(xes), 4 }, + { 0, 4 /* FS is a special case */ }, + { 0, 4 /* GS is a special case */ }, +}; + +static struct wait_queue *kdebug_debugees = NULL; +static struct wait_queue *kdebug_debugger = NULL; +static struct pt_regs *current_debugee_regs = NULL; + +/*====================================================================*/ + +/* If a breakpoint is executed while the cpu is running with interrupts + * disabled or with atomic_bh active, it is not possible to wake up the + * debugger. This used to cause an immediate kernel hang. So we + * maintain our own list of breakpoints and the original opcode. If a + * breakpoint triggers in an unusable spot, restore the original opcode + * and redrive the instruction. + * + * Also if we single step into an unusable position, disable single + * step mode but leave the breakpoints in place. + * + * The kernel does not quite match what gdb thinks is going on but it + * is better than a dead machine. KAO + */ + +typedef struct original { + struct list_head head; + caddr_t eip; + u_char state; /* 0 free, 1 breakpoint on, + 2 disabled by interrupt check */ + u_char opcode; +} ORIGINAL; + +ORIGINAL o_base; + +#define INT3 0xcc +static const char INT3str[] = "cc"; + +static ORIGINAL * find_breakpoint(caddr_t eip) +{ + ORIGINAL *p = (ORIGINAL *) o_base.head.next; + while (p != &o_base) { + if (p->eip == eip) + return p; + p = (ORIGINAL *) (p->head.next); + } + return NULL; +} + +static int record_breakpoint(caddr_t eip) +{ + ORIGINAL *p = find_breakpoint(eip); + if (!p) { + p = kmalloc(sizeof(ORIGINAL), GFP_KERNEL); + if (!p) { + printk(KERN_ERR "xkdebug cannot kmalloc new ORIGINAL\n"); + return 1; + } + list_add(&p->head, &o_base.head); + } + p->eip = eip; + p->state = 1; + p->opcode = *((u_char *) eip); + return 0; +} + +static int remove_breakpoint(caddr_t eip, char *msg) +{ + ORIGINAL *p = find_breakpoint(eip); + if (!p) + return 1; + if (p->state != 1) + return 1; + *((u_char *) eip) = p->opcode; + p->state = 2; + printk(KERN_WARNING "xkdebug breakpoint removed at %p, %s\n", eip, msg); + return 0; +} + +/*====================================================================*/ + +static u_char *stack_base = stack; + +static void update_stack(u_long esp) +{ + u_char *addr = (u_char *)esp; + if ((addr < stack_base) || (addr > stack_base+STACKSIZE)) + stack_base = addr-STACKSIZE/2; +} + +static u_char *map_stack(u_char *addr) +{ + if ((addr > stack_base) && (addr < stack_base+STACKSIZE)) + return addr + (stack - stack_base); + else + return addr; +} + +/*====================================================================*/ + +#if 0 + /* + * I take responsibility for breaking call_kernel. + * It will come back. + * --johnh 16-Oct-95 + */ +static u_long call; + +static void call_kernel(void) +{ + /* Extract the target address from the call dummy */ + call = *(u_long *)(reg[R_EIP]+1) + reg[R_EIP] + 5; + + asm("movl %eax, _save\n movl _reg, %eax"); + asm("movl %ecx, _save+4\n movl _reg+4, %ecx"); + asm("movl %edx, _save+8\n movl _reg+8, %edx"); + asm("movl %ebx, _save+12\n movl _reg+12, %ebx"); + asm("movl %esp, _save+16\n movl _reg+16, %esp"); + asm("movl %ebp, _save+20\n movl _reg+20, %ebp"); + asm("movl %esi, _save+24\n movl _reg+24, %esi"); + asm("movl %edi, _save+28\n movl _reg+28, %edi"); + + asm("movl _call, %eax\n call *%eax"); + + asm("movl %eax, _reg\n movl _save, %eax"); + asm("movl %ecx, _reg+4\n movl _save+4, %ecx"); + asm("movl %edx, _reg+8\n movl _save+8, %edx"); + asm("movl %ebx, _reg+12\n movl _save+12, %ebx"); + asm("movl %esp, _reg+16\n movl _save+16, %esp"); + asm("movl %ebp, _reg+20\n movl _save+20, %ebp"); + asm("movl %esi, _reg+24\n movl _save+24, %esi"); + asm("movl %edi, _reg+28\n movl _save+28, %edi"); + + /* Pretend we stopped at the call dummy breakpoint */ + reg[R_EIP] += 6; +} +#endif /* 0 */ + +/*====================================================================*/ + +static u_char hexchars[] = "0123456789abcdef"; + +static int hex(int ch) +{ + if ((ch >= '0') && (ch <= '9')) return (ch-'0'); + if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10); + if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10); + /* Since noone checks errors, print. */ + printk(KERN_ERR "hex: bogus hex character %c\n", ch); + return -1; +} + +/*====================================================================*/ + +static void put_debug(char ch) +{ + *outq = ch; + outq++; + if (outq == outbuf + BUFSIZE) + outq = outbuf; + if (outp == outq) + printk(KERN_ERR "kdebug: output buffer overflow!\n"); +} + +/*====================================================================*/ + +static void put_packet(char *buf) +{ + u_char checksum; + + if (log_packets) + printk(KERN_DEBUG "put_packet('%s')\n", buf); + put_debug('$'); + checksum = 0; + while (*buf != '\0') { + put_debug(*buf); + checksum += *buf; + buf++; + } + put_debug('#'); + put_debug(hexchars[checksum >> 4]); + put_debug(hexchars[checksum % 16]); +} + +/*====================================================================*/ + +void read_registers(char *dest) +{ + u_char *src, byte; + int i, j, dummy_int; + + for (i = R_EAX; i <= R_XGS; i++) { + switch (i) { + /* FS/GS asm code stolen from _fs() in traps.c */ + case R_XFS: + dummy_int = ({ register unsigned short __res; + __asm__("mov %%fs,%%ax":"=a" (__res):); + __res;}); + src = (u_char*)&dummy_int; + break; + case R_XGS: + dummy_int = ({ register unsigned short __res; + __asm__("mov %%gs,%%ax":"=a" (__res):); + __res;}); + src = (u_char*)&dummy_int; + break; + default: + src = COMPUTE_KREGS_PLACE(current_debugee_regs, kregs_info[i].offset); + break; + }; + for (j = 0; j < 4; j++) { + byte = (j < kregs_info[i].size) ? *src++ : 0; + *dest++ = hexchars[(byte >> 4) & 0xf]; + *dest++ = hexchars[byte & 0xf]; + }; + }; + *dest++ = '\0'; +} + +char *write_register(int reg_num, char *src) +{ + int j; + u_char *dest = COMPUTE_KREGS_PLACE(current_debugee_regs, + kregs_info[reg_num].offset); + void *dummy_dest; + + if (reg_num == R_XESP || reg_num == R_XSS || + reg_num == R_XFS || reg_num == R_XGS) { + dest = (u_char*)&dummy_dest; + /* ignore copy */ + /* xxx: should report changes as an error */ + }; + for (j = 0; j < 4; j++) { + if (j < kregs_info[reg_num].size) + *dest++ = (hex(src[0]) << 4) + hex(src[1]); + src += 2; + }; + if (reg_num == R_EIP && *dest == 0xe5e5e5e5) { + float_stack = 1; + if (float_stack) + update_stack(*(u_long*)dest); + }; + return src; +} + +void write_registers(char *src) +{ + int i; + + for (i = R_EAX; i <= R_XGS; i++) { + src = write_register(i, src); + }; +} + +/*====================================================================*/ + +static void handle_packet(void) +{ + char *ptr; + u_char *inp, *start, *addr; + u_char buf[BUFSIZE], byte; + u_long i, len = 0, reg_num; + int enn = 0; /* reply "Enn" for errors */ + + inp = inbuf; + while (*inp != '$') inp++; + + start = ++inp; + for (; *inp != '#'; inp++) ; + *inp = '\0'; + + if (log_packets) + printk(KERN_DEBUG "handle_packet('%s')\n", start); + + put_debug('+'); + if (start[2] == ':') { + put_debug(start[0]); + put_debug(start[1]); + start += 3; + } + + buf[0] = '\0'; + switch (start[0]) { + case '?': /* last signal */ + strcpy(buf, "S05"); + break; + case 'd': /* toggle debug flag */ + break; + case 'g': /* read registers */ + read_registers(buf); + break; + case 'G': /* write registers */ + write_registers(start+1); + strcpy(buf, "OK"); + break; + case 'P': /* write individual register */ + ptr = start + 1; + reg_num = simple_strtoul(ptr, &ptr, 16); + if (*ptr++ != '=') + break; /* mis-formed packet */ + write_register(reg_num, ptr); + strcpy(buf, "OK"); + break; + case 'm': /* read memory */ + enn = 0; + ptr = start + 1; + addr = (caddr_t)simple_strtoul(ptr, &ptr, 16); + if (float_stack) + addr = map_stack(addr); + if (*ptr == ',') { + ptr++; + len = simple_strtoul(ptr, &ptr, 16); + for (i = 0; i < len; i++) { + if (get_kernel(byte, (u_char *) (addr+i))) { + enn = 1; + break; + } + buf[i<<1] = hexchars[byte >> 4]; + buf[(i<<1)+1] = hexchars[byte % 16]; + } + } + if (enn) + strcpy(buf, "E01"); + else + buf[len*2] = '\0'; + break; + case 'M': /* write memory */ + ptr = start + 1; + addr = (caddr_t)simple_strtoul(ptr, &ptr, 16); + if (float_stack) + addr = map_stack(addr); + if (*ptr == ',') { + ptr++; + len = simple_strtoul(ptr, &ptr, 16); + if (*ptr == ':') { + ptr++; + if (len == 1) { + if (strncmp(ptr, INT3str, 2) == 0) { + if (record_breakpoint(addr)) + break; + } + else { + ORIGINAL *p = find_breakpoint(addr); + u_char c = (hex(ptr[0])<<4) + hex(ptr[1]); + if (p && p->state == 1 && p->opcode == c) + p->state = 0; /* breakpoint disabled */ + } + } + for (i = 0; i < len; i++, ptr += 2) { + if (put_kernel((u_char) ((hex(ptr[0])<<4) + hex(ptr[1])), + (u_char *) (addr+i))) { + enn = 1; + break; + } + } + if (enn) + strcpy(buf, "E02"); + else + strcpy(buf, "OK"); + } + } + break; + case 'c': /* continue */ + if (stack_base != stack) { + strcpy(buf, "S0b"); + break; + } + wake_up(&kdebug_debugees); + interruptible_sleep_on(&kdebug_debugger); + strcpy(buf, "S05"); + break; + case 's': /* step */ + /* Set the step bit, step, then clear it. */ + current_debugee_regs->eflags |= EFLAGS_TRACE_FLAG; + wake_up(&kdebug_debugees); + interruptible_sleep_on(&kdebug_debugger); + current_debugee_regs->eflags &= ~EFLAGS_TRACE_FLAG; + strcpy(buf, "S05"); + break; + case 'k': /* kill */ + break; + } + put_packet(buf); + + inq = inbuf; +} + +/*====================================================================*/ + +static ssize_t debug_read(struct file *filp, char *buf, + size_t count, loff_t *ppos) +{ + /* Should not reset ppos here but the caller does not reset it so I will. + 2.1.74, gdb 4.16. KAO */ + for (*ppos = 0; *ppos < count; ++*ppos) { + if (outp == outq) break; + put_user(*outp, buf); + buf++; outp++; + if (outp == outbuf+BUFSIZE) + outp = outbuf; + } + return *ppos; +} + +/*====================================================================*/ + +static enum { HEAD, BODY, CSUM1, CSUM2 } state = HEAD; + +static ssize_t debug_write(struct file *filp, const char *buf, + size_t count, loff_t *ppos) +{ + char ch; + + /* Should not reset ppos here but the caller does not reset it so I will. + 2.1.74, gdb 4.16. KAO */ + for (*ppos = 0; *ppos < count; ++*ppos) { + if (inq == inbuf+BUFSIZE) break; + get_user(ch, (unsigned char *) buf); + *inq = ch; + inq++; buf++; + switch (state) { + case HEAD: + if (ch == '$') state = BODY; + break; + case BODY: + if (ch == '#') state = CSUM1; + break; + case CSUM1: + state = CSUM2; + break; + case CSUM2: + state = HEAD; + handle_packet(); + break; + } + } + return *ppos; +} + +/*====================================================================*/ + +static int debug_ioctl(struct inode *inode, struct file *file, + u_int cmd, u_long arg) +{ + return 0; +} + +/*====================================================================*/ + +static void generate_fake_regs(void) +{ + static struct pt_regs fake_regs; + +#if 0 + /* xxx: assume that these values are always constant. */ + reg[R_XESP] = save[R_XESP] = (u_long)(stack+STACKSIZE/2); + asm("movw %xcs, _reg+40"); + asm("movw %xss, _reg+44"); + asm("movw %xds, _reg+48"); + asm("movw %xes, _reg+52"); + asm("movw %xfs, _reg+56"); + asm("movw %xgs, _reg+60"); +#endif /* 0 */ + + /* + * To make gdb happy before the first "continue" + * we need to provide registers. + */ + current_debugee_regs = &fake_regs; +} + + +static int debug_open(struct inode *inode, struct file *file) +{ + if (!suser()) + return -EPERM; + if (is_open) + return -EBUSY; + is_open = 1; + inq = inbuf; + outp = outq = outbuf; + INIT_LIST_HEAD(&o_base.head); + + generate_fake_regs(); + put_packet("S02"); /* sigint---just for variety */ + MOD_INC_USE_COUNT; + return 0; +} + +/*====================================================================*/ + +static int debug_release(struct inode *inode, struct file *file) +{ + struct list_head *lh_old; + ORIGINAL *o_old; + while (!list_empty(&o_base.head)) { + o_old = (ORIGINAL *) (lh_old = o_base.head.next); + if (*((u_char *) o_old->eip) == INT3) + remove_breakpoint(o_old->eip, "last ditch clean up"); + list_del(lh_old); + kfree(o_old); + } + /* + * Wake up anyone sleeping. + * gdb or code above has removed the breakpoints. + */ + wake_up(&kdebug_debugees); + is_open = 0; + MOD_DEC_USE_COUNT; + return 0; +} + +/*====================================================================*/ + +static asmlinkage int (*old_int1_hook)(struct pt_regs * regs, long error_code, int intr); +static asmlinkage int (*old_int3_hook)(struct pt_regs * regs, long error_code, int intr); + +static asmlinkage int kdebug_do_ints(struct pt_regs * regs, long error_code, int intr) +{ + /* Yes. Suspend the trapee and wake up the trapper. */ + + char *msg = NULL; + unsigned long flags; + __save_flags(flags); /* I really want the flags value */ + + /* Avoid hung machines when breakpoint triggers with interrupts disabled + * or in bottom half processing. + */ + if (!(flags & 0x00000200)) + msg = "cli()"; + else if (in_interrupt()) + msg = "interrupt context"; + else if (local_bh_count[smp_processor_id()]) + msg = "bh active"; + + /* xxx: race condition on the global variable current_debugee_regs. */ + current_debugee_regs = regs; +#if 0 + printk(KERN_DEBUG "trap intr %d eip %lx flags %lx msg %s\n", + intr, regs->eip, flags, msg); +#endif + + if (msg) { + caddr_t eip; + if (intr == 1) + eip = (caddr_t) current_debugee_regs->eip; /* same eip */ + else + eip = (caddr_t) current_debugee_regs->eip - 1; /* backup */ + if (intr == 3 && *((u_char *) eip) == INT3) { + if (remove_breakpoint(eip, msg) == 0) { + current_debugee_regs->eip = (long) eip; + return 1; + } + } + else if (intr == 1) { + printk(KERN_WARNING "xkdebug single step mode cancelled, %s\n", msg); + current_debugee_regs->eflags &= ~EFLAGS_TRACE_FLAG; + current_debugee_regs->eip = (long) eip; + if (kdebug_debugger) + wake_up(&kdebug_debugger); + return 1; + } + panic("xkdebug breakpoint when %s, cannot recover", msg); + } + + if (kdebug_debugger) + wake_up(&kdebug_debugger); + sleep_on(&kdebug_debugees); + return 1; /* handled */ +} + +int init_module(void) +{ + major_dev = register_chrdev(0, "xkdebug", &debug_fops); + if (major_dev == 0) { + printk(KERN_ERR "unable to grab device # for debug driver\n"); + return -1; + } + old_int1_hook = kernel_int1_hook; + kernel_int1_hook = kdebug_do_ints; + old_int3_hook = kernel_int3_hook; + kernel_int3_hook = kdebug_do_ints; + return 0; +} + +void cleanup_module(void) +{ + kernel_int1_hook = old_int1_hook; + kernel_int3_hook = old_int3_hook; + if (major_dev != 0) + unregister_chrdev(major_dev, "xkdebug"); +} + +/* + * Local Variables: + * c-basic-offset:4 + * End: + */ Index: linux/arch/i386/mm/fault.c diff -u linux/arch/i386/mm/fault.c:1.1.1.1 linux/arch/i386/mm/fault.c:1.1.3.1 --- linux/arch/i386/mm/fault.c:1.1.1.1 Mon Jan 18 02:28:56 1999 +++ linux/arch/i386/mm/fault.c Sun Jan 24 20:06:39 1999 @@ -4,6 +4,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include #include @@ -232,6 +234,8 @@ return; } + /* recursion is the curse of the programming classes */ + SUSPEND_MCOUNT_PROC(current); if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else @@ -250,6 +254,9 @@ } die("Oops", regs, error_code); do_exit(SIGKILL); +#ifdef CONFIG_DEBUG_MCOUNT + mcount(); +#endif /* * We ran out of memory, or some other thing happened to us that made Index: linux/arch/i386/mm/init.c diff -u linux/arch/i386/mm/init.c:1.1.1.2 linux/arch/i386/mm/init.c:1.1.3.2 --- linux/arch/i386/mm/init.c:1.1.1.2 Tue Jan 26 19:27:22 1999 +++ linux/arch/i386/mm/init.c Sun Jan 31 03:54:39 1999 @@ -43,6 +43,7 @@ pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE); } +#ifndef CONFIG_MEMLEAK /* put these back into pgtable.h */ pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset) { pte_t *pte; @@ -86,6 +87,7 @@ } return (pte_t *) (pmd_page(*pmd) + offset); } +#endif CONFIG_MEMLEAK int do_check_pgt_cache(int low, int high) { Index: linux/arch/sparc64/kernel/smp.c diff -u linux/arch/sparc64/kernel/smp.c:1.1.1.3 linux/arch/sparc64/kernel/smp.c:1.1.3.3 --- linux/arch/sparc64/kernel/smp.c:1.1.1.3 Mon Mar 29 23:36:27 1999 +++ linux/arch/sparc64/kernel/smp.c Tue Mar 30 20:11:14 1999 @@ -528,6 +528,7 @@ static inline void sparc64_do_profile(unsigned long pc) { +#ifndef CONFIG_PROFILE_GCC if (prof_buffer && current->pid) { extern int _stext; @@ -538,6 +539,7 @@ pc = prof_len - 1; atomic_inc((atomic_t *)&prof_buffer[pc]); } +#endif } static unsigned long current_tick_offset; Index: linux/drivers/char/sysrq.c diff -u linux/drivers/char/sysrq.c:1.1.1.1 linux/drivers/char/sysrq.c:1.1.3.1 --- linux/drivers/char/sysrq.c:1.1.1.1 Mon Jan 18 02:28:21 1999 +++ linux/drivers/char/sysrq.c Sun Jan 24 20:07:51 1999 @@ -6,6 +6,8 @@ * * (c) 1997 Martin Mares * based on ideas by Pavel Machek + * Add dumploGs. Keith Owens 12/04/1998. + * Add Oops, changed Off to oFf. Keith Owens 26/04/1998. */ #include @@ -31,7 +33,12 @@ extern void wakeup_bdflush(int); extern void reset_vc(unsigned int); extern int console_loglevel; +extern void syslog_to_console(void); extern struct vfsmount *vfsmntlist; +#ifdef CONFIG_TRACE +#include +extern void ktrace_to_console(void); +#endif /* Send a signal to all user processes */ @@ -83,7 +90,7 @@ machine_restart(NULL); break; #ifdef CONFIG_APM - case 'o': /* O -- power off */ + case 'f': /* F -- power off */ printk("Power off\n"); apm_power_off(); break; @@ -130,6 +137,27 @@ send_sig_all(SIGKILL, 1); orig_log_level = 8; break; + case 'g': /* G -- dump all logs */ +#ifdef CONFIG_TRACE + SUSPEND_MCOUNT_TRACE; /* no point in tracing this section */ +#endif + printk("Dump All Logs\n"); + printk(KERN_INFO "DAL: syslog start\n"); + syslog_to_console(); + printk(KERN_INFO "DAL: syslog end\n"); + /* add any other log dumps here */ +#ifdef CONFIG_TRACE + printk(KERN_INFO "DAL: ktrace start\n"); + ktrace_to_console(); + printk(KERN_INFO "DAL: ktrace end\n"); + RESUME_MCOUNT_TRACE; +#endif + printk("\n"); + break; + case 'o': /* O -- oops */ + printk("Forcing Oops\n"); + *(char *)NULL = '\0'; + break; default: /* Unknown: help */ if (kbd) printk("unRaw "); @@ -139,9 +167,9 @@ #endif printk("Boot " #ifdef CONFIG_APM - "Off " + "oFf " #endif - "Sync Unmount showPc showTasks showMem loglevel0-8 tErm kIll killalL\n"); + "Sync Unmount showPc showTasks showMem loglevel0-8 tErm kIll killalL dumploGs Oops\n"); /* Don't use 'A' as it's handled specially on the Sparc */ } Index: linux/fs/proc/array.c diff -u linux/fs/proc/array.c:1.1.1.7 linux/fs/proc/array.c:1.1.3.4 --- linux/fs/proc/array.c:1.1.1.7 Wed Mar 24 01:48:05 1999 +++ linux/fs/proc/array.c Thu Mar 25 02:27:23 1999 @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -208,9 +209,120 @@ }; struct inode_operations proc_profile_inode_operations = { - &proc_profile_operations, + &proc_profile_operations, }; +#ifdef CONFIG_TRACE + +/* + * This function accesses kernel tracer information. The returned data is + * binary: the sampling step and the actual contents of the trace + * ringbuffer. Use of the program 'ktrace' is recommended in order to + * get meaningful info out of these data. + */ +static ssize_t read_trace(struct file *file, char *buf, size_t count, loff_t *ppos) +{ + loff_t p = *ppos, left; + unsigned long flags; + int i; + + SUSPEND_MCOUNT_TRACE; + LOCK_MCOUNT_TRACE(flags); + + /* Calibrate the tracer */ + for (i = 1; i <= TRACE_CALIBRATION_CALLS; ++i) + mcount_internal(-1); + + UNLOCK_MCOUNT_TRACE(flags); + + if (p >= sizeof(*trace_table)) + count = 0; + else if (count > sizeof(*trace_table) - p) + count = sizeof(*trace_table) - p; + + left = copy_to_user(buf, p + (char *)trace_table, count); + + RESUME_MCOUNT_TRACE; + + if (count && left == count) + return -EFAULT; + + *ppos += count - left; + return count - left; +} + +/* + * Writing to /proc/trace resets the counters. Doesnt make much sense + * as it's a ringbuffer, but we do it anyways, it might make sense for + * doing short term traces. + */ + +static ssize_t write_trace(struct file * file, const char * buf, size_t count, loff_t *ppos) +{ + unsigned long flags; + SUSPEND_MCOUNT_TRACE; + LOCK_MCOUNT_TRACE(flags); + memset(trace_table->entries, 0, sizeof(trace_table->entries)); + trace_table->curr_call = CONFIG_TRACE_SIZE-1; + UNLOCK_MCOUNT_TRACE(flags); + RESUME_MCOUNT_TRACE; + return count; +} + +static struct file_operations proc_trace_operations = { + NULL, /* lseek */ + read_trace, + write_trace, +}; + +struct inode_operations proc_trace_inode_operations = { + &proc_trace_operations, +}; + +/* + * Dump the kernel trace table in hex to all registered consoles. + * A method of getting the trace table when all else fails. + * This is a raw dump, the entire table is printed in hex, 80 hex digits + * to a line. Capture the output via a serial console and feed into + * ktrace with the "-d filename" option. + * Not recommended for a large trace table over a slow serial line. + */ +#define TRACE_LINE_WIDTH 80 +void ktrace_to_console(void) +{ + static const char hexchar[] = "0123456789abcdef"; + int i; + unsigned c; + char buf[TRACE_LINE_WIDTH+3], *p; + + SUSPEND_MCOUNT_TRACE; + /* Should LOCK_MCOUNT_TRACE here but that might stop output. + * Live with the risk of dumping garbage. Cannot calibrate + * without the lock, OTOH accurate timing figures are probably + * the least of our worries at this point. + */ + + for (i = 0, p = buf; i < sizeof(*trace_table); ++i) { + /* hex convert inline, 200,000+ calls to vsprintf is slow */ + c = *((unsigned char *)(trace_table)+i); + *p++ = hexchar[c>>4]; + *p++ = hexchar[c&0xf]; + if (p - buf >= TRACE_LINE_WIDTH) { + *p++ = '\n'; + *p++ = '\0'; + console_print(buf); + p = buf; + } + } + if (p != buf) { + *p++ = '\n'; + *p++ = '\0'; + console_print(buf); + } + + RESUME_MCOUNT_TRACE; +} +#endif /* CONFIG_TRACE */ static int get_loadavg(char * buffer) { Index: linux/fs/proc/root.c diff -u linux/fs/proc/root.c:1.1.1.2 linux/fs/proc/root.c:1.1.3.1 --- linux/fs/proc/root.c:1.1.1.2 Mon Jan 18 14:39:15 1999 +++ linux/fs/proc/root.c Sun Jan 24 20:10:08 1999 @@ -21,6 +21,7 @@ #ifdef CONFIG_ZORRO #include #endif +#include /* * Offset of the first process in the /proc root directory.. @@ -505,6 +506,22 @@ NULL /* permission */ }; +#ifdef CONFIG_TRACE +static struct proc_dir_entry proc_root_trace = { + PROC_TRACE, 5, "trace", + S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, + 0, &proc_trace_inode_operations +}; +#endif + +#ifdef CONFIG_MEMLEAK +static struct proc_dir_entry proc_root_memleak = { + PROC_MEMLEAK, 7, "memleak", + S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0, + 0, &proc_memleak_inode_operations +}; +#endif + static struct proc_dir_entry proc_root_loadavg = { PROC_LOADAVG, 7, "loadavg", S_IFREG | S_IRUGO, 1, 0, 0, @@ -728,6 +745,15 @@ proc_register(&proc_root, &proc_root_stram); #endif proc_register(&proc_root, &proc_root_slab); + +#ifdef CONFIG_MEMLEAK + proc_register(&proc_root, &proc_root_memleak); +#endif + +#ifdef CONFIG_TRACE + proc_register(&proc_root, &proc_root_trace); + proc_root_trace.size = sizeof(*trace_table); +#endif if (prof_shift) { proc_register(&proc_root, &proc_root_profile); Index: linux/include/asm-alpha/init.h diff -u linux/include/asm-alpha/init.h:1.1.1.1 linux/include/asm-alpha/init.h:1.1.3.1 --- linux/include/asm-alpha/init.h:1.1.1.1 Mon Jan 18 02:27:22 1999 +++ linux/include/asm-alpha/init.h Sun Jan 24 20:10:24 1999 @@ -1,6 +1,23 @@ #ifndef _ALPHA_INIT_H #define _ALPHA_INIT_H +#include + +#ifdef CONFIG_KERNEL_DEBUGGING +/* + * GCC bug, -pg doesnt mix well with section attribute ... :*( + */ +#define __init +#define __initdata +#define __initfunc(__arginit) __arginit +#define __INIT +#define __FINIT +#define __INITDATA + +#else /* !CONFIG_KERNEL_DEBUGGING */ +/* + * no -pg switch, we are cool and use init sections: + */ #define __init __attribute__ ((__section__ (".text.init"))) #define __initdata __attribute__ ((__section__ (".data.init"))) #define __initfunc(__arginit) \ @@ -11,6 +28,8 @@ #define __INIT .section .text.init,"ax" #define __FINIT .previous #define __INITDATA .section .data.init,"a" + +#endif /* CONFIG_KERNEL_DEBUGGING */ #define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES))) Index: linux/include/asm-alpha/profiler.h diff -u /dev/null linux/include/asm-alpha/profiler.h:1.1.3.1 --- /dev/null Sat Apr 24 02:50:12 1999 +++ linux/include/asm-alpha/profiler.h Sun Jan 24 20:10:29 1999 @@ -0,0 +1,49 @@ +#ifndef _LINUX_PROFILER_ASM_H +#define _LINUX_PROFILER_ASM_H + +#include + +#ifdef CONFIG_DEBUG_MCOUNT + +/* + * You've got to define two macros if you port the profiling stuff: + */ + +/* + * [kernel stack overflow profiling] + * + * this says how much kernel stack space is >left<. If this goes + * below a certain treshold then we generate an artificial oops. + * + * we do not assume anything about stack growth direction + */ + +/* Dummy for now. Anybody care to supply code to get the stack size on + * an Alpha? KAO */ +#warning No real support for KSTACK on Alpha +#define get_stack_left() 4095 + +/* + * [kernel tracer] + * + * this macro gets fast an accurate time and puts it into a 'u32' + * variable. It's used as a tracer timestamp. + */ + +#ifdef CONFIG_TRACE_TIMESTAMP +#define get_profiler_timestamp() \ + ( { \ + register u32 __res; \ + asm volatile ("rpcc %0" : "r="(__res)); \ + __res; \ + } ) + +/* Always u32, even when CONFIG_TRACE_TRUNCTIME */ +typedef u32 profiler_timestamp_t; +#endif /* CONFIG_TRACE_TIMESTAMP */ + +typedef unsigned long profiler_pc_t; + +#endif /* CONFIG_DEBUG_MCOUNT */ + +#endif /* _LINUX_PROFILER_ASM_H */ Index: linux/include/asm-i386/init.h diff -u linux/include/asm-i386/init.h:1.1.1.1 linux/include/asm-i386/init.h:1.1.3.1 --- linux/include/asm-i386/init.h:1.1.1.1 Mon Jan 18 02:27:17 1999 +++ linux/include/asm-i386/init.h Sun Jan 24 20:10:58 1999 @@ -1,6 +1,25 @@ #ifndef _I386_INIT_H #define _I386_INIT_H +#include + +#ifdef CONFIG_KERNEL_DEBUGGING +/* + * GCC bug, -pg doesnt mix well with section attribute ... :*( + * memleak also needs the __init functions to stay resident + * due to ID structs (alloc_struct see mm.h) living in some init code. + */ +#define __init +#define __initdata +#define __initfunc(__arginit) __arginit +#define __INIT +#define __FINIT +#define __INITDATA + +#else +/* + * no -pg switch, we are cool and use init sections: + */ #define __init __attribute__ ((__section__ (".text.init"))) #define __initdata __attribute__ ((__section__ (".data.init"))) #define __initfunc(__arginit) \ @@ -10,6 +29,8 @@ #define __INIT .section ".text.init",#alloc,#execinstr #define __FINIT .previous #define __INITDATA .section ".data.init",#alloc,#write + +#endif #define __cacheline_aligned __attribute__ \ ((__section__ (".data.cacheline_aligned"))) Index: linux/include/asm-i386/pgtable.h diff -u linux/include/asm-i386/pgtable.h:1.1.1.3 linux/include/asm-i386/pgtable.h:1.1.3.3 --- linux/include/asm-i386/pgtable.h:1.1.1.3 Sat Apr 17 16:26:16 1999 +++ linux/include/asm-i386/pgtable.h Sat Apr 24 02:48:08 1999 @@ -205,7 +205,11 @@ * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) */ +#ifndef CONFIG_MEMLEAK #define VMALLOC_OFFSET (8*1024*1024) +#else +#define VMALLOC_OFFSET (128*1024*1024) +#endif #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) #define VMALLOC_END (FIXADDR_START) @@ -399,6 +403,7 @@ #define pte_quicklist (current_cpu_data.pte_quick) #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) +#ifndef CONFIG_MEMLEAK extern __inline__ pgd_t *get_pgd_slow(void) { pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL), *init; @@ -437,9 +442,6 @@ free_page((unsigned long)pgd); } -extern pte_t *get_pte_slow(pmd_t *pmd, unsigned long address_preadjusted); -extern pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long address_preadjusted); - extern __inline__ pte_t *get_pte_fast(void) { unsigned long *ret; @@ -464,6 +466,126 @@ free_page((unsigned long)pte); } +extern pte_t *get_pte_slow(pmd_t *pmd, unsigned long address_preadjusted); +extern pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long address_preadjusted); +extern int do_check_pgt_cache(int, int); + +#else /* CONFIG_MEMLEAK */ + +#define get_pgd_slow() \ +({ \ + pgd_t *_ret = (pgd_t *) __get_free_page(GFP_KERNEL), *init; \ + if (_ret) { \ + init = pgd_offset(&init_mm, 0); \ + memset (_ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); \ + memcpy (_ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, \ + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); \ + } \ + _ret; \ +}) + +#define get_pgd_fast() \ +({ \ + unsigned long *_ret; \ + if((_ret = pgd_quicklist) != NULL) { \ + pgd_quicklist = (unsigned long *)(*_ret); \ + _ret[0] = _ret[1]; \ + pgtable_cache_size--; \ + } else \ + _ret = (unsigned long *)get_pgd_slow(); \ + (pgd_t *)_ret; \ +}) + +#define free_pgd_fast(pgd) \ +({ \ + *(unsigned long *)(pgd) = (unsigned long) pgd_quicklist; \ + pgd_quicklist = (unsigned long *) (pgd); \ + pgtable_cache_size++; \ +}) + +#define free_pgd_slow(pgd) \ +({ \ + free_page((unsigned long)(pgd)); \ +}) + +#define get_pte_slow(pmd, offset) \ +({ \ + pte_t *_ret; \ + unsigned long _pte = (unsigned long) __get_free_page(GFP_KERNEL); \ + if (pmd_none(*((pmd_t *)(pmd)))) { \ + if (_pte) { \ + clear_page(_pte); \ + pmd_val(*((pmd_t *)(pmd))) = _PAGE_TABLE + __pa(_pte); \ + _ret = (pte_t *)(_pte + (unsigned long)(offset)); \ + goto out_get_pte_slow; \ + } \ + pmd_val(*((pmd_t *)(pmd))) = _PAGE_TABLE + __pa(BAD_PAGETABLE); \ + _ret = NULL; \ + goto out_get_pte_slow; \ + } \ + free_page(_pte); \ + if (pmd_bad(*((pmd_t *)(pmd)))) { \ + __bad_pte((pmd_t *)(pmd)); \ + _ret = NULL; \ + goto out_get_pte_slow; \ + } \ + _ret = (pte_t *) (pmd_page(*((pmd_t *)(pmd))) + (unsigned long)(offset)); \ +out_get_pte_slow: \ + _ret; \ +}) + +#define get_pte_kernel_slow(pmd, offset) \ +({ \ + pte_t *_ret, *_pte = (pte_t *) __get_free_page(GFP_KERNEL); \ + if (pmd_none(*((pmd_t *)(pmd)))) { \ + if (_pte) { \ + clear_page((unsigned long)_pte); \ + pmd_val(*((pmd_t *)(pmd))) = _KERNPG_TABLE + __pa(_pte); \ + _ret = _pte + (unsigned long)(offset); \ + goto out_get_pte_kernel_slow; \ + } \ + pmd_val(*((pmd_t *)(pmd))) = _KERNPG_TABLE + __pa(BAD_PAGETABLE); \ + _ret = NULL; \ + goto out_get_pte_kernel_slow; \ + } \ + free_page((unsigned long)_pte); \ + if (pmd_bad(*((pmd_t *)(pmd)))) { \ + __bad_pte_kernel((pmd_t *)(pmd)); \ + _ret = NULL; \ + goto out_get_pte_kernel_slow; \ + } \ + _ret = (pte_t *)(pmd_page(*((pmd_t *)(pmd))) + (unsigned long)(offset)); \ +out_get_pte_kernel_slow: \ + _ret; \ +}) + +#define get_pte_fast() \ +({ \ + unsigned long *_ret; \ + if((_ret = (unsigned long *)pte_quicklist) != NULL) { \ + pte_quicklist = (unsigned long *)(*_ret); \ + _ret[0] = _ret[1]; \ + pgtable_cache_size--; \ + } \ + (pte_t *)_ret; \ +}) + +#define free_pte_fast(pte) \ +({ \ + *(unsigned long *)(pte) = (unsigned long) pte_quicklist; \ + pte_quicklist = (unsigned long *) (pte); \ + pgtable_cache_size++; \ +}) + +#define free_pte_slow(pte) \ +({ \ + free_page((unsigned long)(pte)); \ +}) + +extern int do_check_pgt_cache(int, int); + +#endif /* CONFIG_MEMLEAK */ + /* We don't use pmd cache, so these are dummy routines */ extern __inline__ pmd_t *get_pmd_fast(void) { @@ -486,6 +608,7 @@ #define pgd_free(pgd) free_pgd_fast(pgd) #define pgd_alloc() get_pgd_fast() +#ifndef CONFIG_MEMLEAK extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) { address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); @@ -526,6 +649,59 @@ __bad_pte(pmd); return NULL; } +#else /* CONFIG_MEMLEAK */ +#define pte_alloc_kernel(pmd, address) \ +({ \ + pte_t * _ret; \ + unsigned long _address = ((unsigned long)(address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); \ + if (pmd_none(*((pmd_t *)(pmd)))) { \ + pte_t * _page = (pte_t *) get_pte_fast(); \ + if (!_page) { \ + _ret = get_pte_kernel_slow(((pmd_t *)(pmd)), _address); \ + goto out_pte_alloc_kernel; \ + } \ + pmd_val(*((pmd_t *)(pmd))) = _KERNPG_TABLE + __pa(_page); \ + _ret = _page + _address; \ + goto out_pte_alloc_kernel; \ + } \ + if (pmd_bad(*((pmd_t *)(pmd)))) { \ + __bad_pte_kernel(((pmd_t *)(pmd))); \ + _ret = NULL; \ + goto out_pte_alloc_kernel; \ + } \ + _ret = (pte_t *) pmd_page(*((pmd_t *)(pmd))) + _address; \ +out_pte_alloc_kernel: \ + _ret; \ +}) + +#define pte_alloc(pmd, address) \ +({ \ + pte_t *_ret; \ + unsigned long _address = ((unsigned long)(address) >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1); \ + if (pmd_none(*((pmd_t *)(pmd)))) \ + goto getnew; \ + if (pmd_bad(*((pmd_t *)(pmd)))) \ + goto fix; \ + _ret = (pte_t *) (pmd_page(*((pmd_t *)(pmd))) + _address); \ + goto out_pte_alloc; \ +getnew: \ +{ \ + unsigned long _page = (unsigned long) get_pte_fast(); \ + if (!_page) { \ + _ret = get_pte_slow(((pmd_t *)(pmd)), _address); \ + goto out_pte_alloc; \ + } \ + pmd_val(*((pmd_t *)(pmd))) = _PAGE_TABLE + __pa(_page); \ + _ret = (pte_t *) (_page + _address); \ + goto out_pte_alloc; \ +} \ +fix: \ + __bad_pte(((pmd_t *)(pmd))); \ + _ret = NULL; \ +out_pte_alloc: \ + _ret; \ +}) +#endif /* CONFIG_MEMLEAK */ /* * allocating and freeing a pmd is trivial: the 1-entry pmd is @@ -542,8 +718,6 @@ #define pmd_free_kernel pmd_free #define pmd_alloc_kernel pmd_alloc - -extern int do_check_pgt_cache(int, int); extern inline void set_pgdir(unsigned long address, pgd_t entry) { Index: linux/include/asm-i386/profiler.h diff -u /dev/null linux/include/asm-i386/profiler.h:1.1.3.1 --- /dev/null Sat Apr 24 02:50:15 1999 +++ linux/include/asm-i386/profiler.h Sun Jan 24 20:11:06 1999 @@ -0,0 +1,61 @@ +#ifndef _LINUX_PROFILER_ASM_H +#define _LINUX_PROFILER_ASM_H + +#include + +#ifdef CONFIG_DEBUG_MCOUNT + +/* + * You've got to define two macros if you port the profiling stuff: + */ + +/* + * [kernel stack overflow profiling] + * + * this says how much kernel stack space is >left<. If this goes + * below a certain treshold then we generate an artificial oops. + * + * we do not assume anything about stack growth direction + */ + +#define get_stack_left() \ +({ register unsigned long __res; \ + __asm__("movl %%esp, %0" : "=r" (__res)); \ + __res & 0x1fff; \ +}) + +/* + * [kernel tracer] + * + * this macro gets fast an accurate time and puts it into a 'long long' + * variable. It's used as a tracer timestamp. + */ + +#ifdef CONFIG_TRACE_TIMESTAMP +#define get_profiler_timestamp() \ + ( { \ + register u64 __res; \ + if (boot_cpu_data.x86_capability & 0x10) { \ + __asm__ __volatile__( \ + "rdtsc" : "=A"(__res) \ + ); \ + } \ + else { \ + /* no rdtsc, use jiffies instead */ \ + __res = jiffies; \ + } \ + __res; \ + } ) + +#ifdef CONFIG_TRACE_TRUNCTIME +typedef u32 profiler_timestamp_t; +#else +typedef u64 profiler_timestamp_t; +#endif /* CONFIG_TRACE_TRUNCTIME */ +#endif /* CONFIG_TRACE_TIMESTAMP */ + +typedef unsigned long profiler_pc_t; + +#endif /* CONFIG_DEBUG_MCOUNT */ + +#endif /* _LINUX_PROFILER_ASM_H */ Index: linux/include/asm-i386/ptrace.h diff -u linux/include/asm-i386/ptrace.h:1.1.1.1 linux/include/asm-i386/ptrace.h:1.1.3.1 --- linux/include/asm-i386/ptrace.h:1.1.1.1 Mon Jan 18 02:27:15 1999 +++ linux/include/asm-i386/ptrace.h Sun Jan 24 20:11:01 1999 @@ -48,9 +48,19 @@ #define PTRACE_SETFPREGS 15 #ifdef __KERNEL__ +#include + #define user_mode(regs) ((VM_MASK & (regs)->eflags) || (3 & (regs)->xcs)) #define instruction_pointer(regs) ((regs)->eip) extern void show_regs(struct pt_regs *); + +/* Additions for kdebug: + * A chain of interupt handlers. + * Return non-zero when the interrupt has been serviced. + * -johnh + */ +extern asmlinkage int (*kernel_int1_hook)(struct pt_regs * regs, long error_code, int intr); +extern asmlinkage int (*kernel_int3_hook)(struct pt_regs * regs, long error_code, int intr); #endif #endif Index: linux/include/asm-i386/system.h diff -u linux/include/asm-i386/system.h:1.1.1.1 linux/include/asm-i386/system.h:1.1.3.1 --- linux/include/asm-i386/system.h:1.1.1.1 Mon Jan 18 02:27:15 1999 +++ linux/include/asm-i386/system.h Sun Jan 24 20:11:03 1999 @@ -1,18 +1,25 @@ #ifndef __ASM_SYSTEM_H #define __ASM_SYSTEM_H +#include #include #include #ifdef __KERNEL__ struct task_struct; /* one of the stranger aspects of C forward declarations.. */ +#ifndef CONFIG_KERNEL_DEBUGGING /* Fix the FASTCALL thing -arca */ extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); +#else +extern void __switch_to(struct task_struct *prev, struct task_struct *next) + __attribute__((stdcall)); +#endif /* * We do most of the task switching in C, but we need * to do the EIP/ESP switch in assembly.. */ +#ifdef CONFIG_KERNEL_DEBUGGING /* we can' t use FASTCALL -arca */ #define switch_to(prev,next) do { \ unsigned long eax, edx, ecx; \ asm volatile("pushl %%ebx\n\t" \ @@ -22,6 +29,8 @@ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %8\n\t" /* pass args throught the stack */ \ + "pushl %7\n\t" /* pass args throught the stack */ \ "pushl %6\n\t" /* restore EIP */ \ "jmp __switch_to\n" \ "1:\t" \ @@ -34,6 +43,29 @@ :"m" (next->tss.esp),"m" (next->tss.eip), \ "a" (prev), "d" (next)); \ } while (0) +#else /* original */ +#define switch_to(prev,next) do { \ + unsigned long eax, edx, ecx; \ + asm volatile("pushl %%ebx\n\t" \ + "pushl %%esi\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %5,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %6\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popl %%edi\n\t" \ + "popl %%esi\n\t" \ + "popl %%ebx" \ + :"=m" (prev->tss.esp),"=m" (prev->tss.eip), \ + "=a" (eax), "=d" (edx), "=c" (ecx) \ + :"m" (next->tss.esp),"m" (next->tss.eip), \ + "a" (prev), "d" (next)); \ +} while (0) +#endif #define _set_base(addr,base) do { unsigned long __pr; \ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ Index: linux/include/linux/kernel.h diff -u linux/include/linux/kernel.h:1.1.1.1 linux/include/linux/kernel.h:1.1.3.2 --- linux/include/linux/kernel.h:1.1.1.1 Mon Jan 18 02:27:02 1999 +++ linux/include/linux/kernel.h Tue Apr 6 23:22:55 1999 @@ -8,6 +8,7 @@ #ifdef __KERNEL__ #include +#include #include /* Optimization barrier */ @@ -19,7 +20,11 @@ #define LONG_MAX ((long)(~0UL>>1)) #define ULONG_MAX (~0UL) -#define STACK_MAGIC 0xdeadbeef +#if BITS_PER_LONG < 64 +# define STACK_MAGIC 0xdeadbeef +#else +# define STACK_MAGIC 0xfeedbabedeadbeef +#endif #define KERN_EMERG "<0>" /* system is unusable */ #define KERN_ALERT "<1>" /* action must be taken immediately */ @@ -34,7 +39,7 @@ # define ATTRIB_NORET __attribute__((noreturn)) # define NORET_AND noreturn, -#ifdef __i386__ +#if defined(__i386__) && !defined(CONFIG_KERNEL_DEBUGGING) #define FASTCALL(x) x __attribute__((regparm(3))) #else #define FASTCALL(x) x Index: linux/include/linux/memleak_unwrap.h diff -u /dev/null linux/include/linux/memleak_unwrap.h:1.1.3.1 --- /dev/null Sat Apr 24 02:50:24 1999 +++ linux/include/linux/memleak_unwrap.h Sun Jan 24 20:12:45 1999 @@ -0,0 +1,93 @@ +#ifndef _MM_UNWRAP_H +#define _MM_UNWRAP_H + +#include + +#ifdef CONFIG_MEMLEAK + +#ifdef MEMLEAK_UNWRAP_PAGE +/* mm/page_alloc.c */ +#undef __get_free_pages +#define __get_free_pages(gfp_mask,gfporder) \ + __get_free_pages_wrap((gfp_mask),(gfporder),IDPTR) +#endif /* MEMLEAK_UNWRAP_PAGE */ + +#ifdef MEMLEAK_UNWRAP_SLAB +/* mm/slab.c */ +/* + * NOTE: leave kmem_cache_create wrapped, as otherwise the allocation + * id won't exist for the underlying allocator. Other functions + * which lead to a physical allocation must also pass this id. + * This looks ugly, but causes ownership of the allocation to be + * passed on to the allocation initiator. + * + * freeing of the allocation is the responsibility of the underlying + * allocator. I hope that this helps to keep memleak in sync. + */ + +#undef kmem_cache_alloc +#undef kmalloc + +#define kmem_cache_alloc(cachep,flags) \ + kmem_cache_alloc_wrap((cachep),(flags),IDPTR) + +#define kmalloc(size,priority) \ + kmalloc_wrap((size),(priority),IDPTR) + +#define kmem_cache_grow(cachep,flags) \ + kmem_cache_grow_wrap((cachep),(flags),IDPTR) + +#define __kmem_cache_alloc(cachep,flags) \ + __kmem_cache_alloc_wrap((cachep),(flags),IDPTR) + +#define kmem_cache_slabmgmt(cachep,objp,local_flags) \ + kmem_cache_slabmgmt_wrap((cachep),(objp),(local_flags),IDPTR) +#endif /* MEMLEAK_UNWRAP_SLAB */ + +#ifdef MEMLEAK_UNWRAP_VMALLOC +/* mm/vmalloc.c */ +#undef vmalloc +#undef get_vm_area +#undef alloc_area_pte +#undef alloc_area_pmd +#undef vmalloc_area_pages + +#define get_vm_area(size) get_vm_area_wrap((size),IDPTR) +#define alloc_area_pte(pte, address, size) alloc_area_pte_wrap((pte),(address),(size),IDPTR) +#define alloc_area_pmd(pmd, address, size) alloc_area_pmd_wrap((pmd),(address),(size),IDPTR) +#define vmalloc_area_pages(address, size) vmalloc_area_pages_wrap((address),(size),IDPTR) +#endif /* MEMLEAK_UNWRAP_VMALLOC */ + +#ifdef MEMLEAK_UNWRAP_SKBUFF +/* net/core/skbuff.c */ +#undef alloc_skb +#undef skb_clone +#undef skb_copy +#undef skb_realloc_headroom + +#define alloc_skb(size,gfp_mask) alloc_skb_wrap((size),(gfp_mask),IDPTR) +#define skb_clone(skb,gfp_mask) skb_clone_wrap((skb),(gfp_mask),IDPTR) +#define skb_copy(skb,gfp_mask) skb_copy_wrap((skb),(gfp_mask),IDPTR) +#define skb_realloc_headroom(skb,newheadroom) skb_realloc_headroom_wrap((skb),(newheadroom),IDPTR) +#endif /* MEMLEAK_UNWRAP_SKBUFF */ + +#ifdef MEMLEAK_UNWRAP_SOCK +/* net/core/sock.c */ +#undef sock_wmalloc +#undef sock_rmalloc +#undef sock_kmalloc +#undef sk_alloc + +#define sock_wmalloc(sk,size,force,priority) sock_wmalloc_wrap((sk),(size),(force),(priority),IDPTR) +#define sock_rmalloc(sk,size,force,priority) sock_rmalloc_wrap((sk),(size),(force),(priority),IDPTR) +#define sock_kmalloc(sk,size,priority) sock_kmalloc_wrap((sk),(size),(priority),IDPTR) +#define sk_alloc(family,priority,zero_it) sk_alloc_wrap((family),(priority),(zero_it),IDPTR) + +/* include/net/sock.h */ +#undef sock_alloc_send_skb +#define sock_alloc_send_skb(sk,size,fallback,noblock,errcode) \ + sock_alloc_send_skb_wrap((sk),(size),(fallback),(noblock),(errcode),IDPTR) +#endif /* MEMLEAK_UNWRAP_SOCK */ + +#endif /* CONFIG_MEMLEAK */ +#endif /* _MM_UNWRAP_H */ Index: linux/include/linux/mm.h diff -u linux/include/linux/mm.h:1.1.1.4 linux/include/linux/mm.h:1.1.3.2 --- linux/include/linux/mm.h:1.1.1.4 Tue Mar 9 01:55:28 1999 +++ linux/include/linux/mm.h Fri Mar 19 01:23:27 1999 @@ -6,6 +6,7 @@ #ifdef __KERNEL__ +#include #include extern unsigned long max_mapnr; @@ -255,6 +256,7 @@ * goes to clearing the page. If you want a page without the clearing * overhead, just use __get_free_page() directly.. */ +#ifndef CONFIG_MEMLEAK #define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0) #define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order)) extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order)); @@ -268,6 +270,101 @@ clear_page(page); return page; } + +#define MEMLEAK_ALLOC(addr) {} +#define MEMLEAK_FREE(addr) {} +#define MEMLEAK_ALLOC_NOLOCK(addr) {} +#define MEMLEAK_FREE_TRUE(expr,addr) {} + +#else +/* + * 'allocation identifier' for memleak detection + */ +struct alloc_struct { + int id; + char *file; + int line; +}; + +#define MEMLEAK_WRAP(x,y...) \ +({ \ + static struct alloc_struct MEMLEAKID = { 0, __FILE__, __LINE__ }; \ + x##_wrap(y,&MEMLEAKID); \ +}) + +extern unsigned long memleak_init (unsigned long, unsigned long); +extern int alloc_addr_lock(unsigned long, struct alloc_struct *); +extern int alloc_addr_nolock(unsigned long, struct alloc_struct *); +extern int free_addr(unsigned long); + +#define MEMLEAK_PARANOID 1 +#ifdef MEMLEAK_PARANOID +#define PROBLEM() printk(KERN_ERR "MEMLEAK PROBLEM: <%s,%d>.\n",__FILE__,__LINE__) +#else +#define PROBLEM() {} +#endif /* MEMLEAK_PARANOID */ + +extern unsigned long FASTCALL(__get_free_pages_wrap(int gfp_mask, + unsigned long gfporder, struct alloc_struct *IDPTR)); + +#ifndef MEMLEAK_PASS_ALLOCATION +/* These are for use externally to an allocator. All allocators pass a + * pointer down the stack and map the allocation from inside the alocator, + * and under it's locking mechanism. + */ +#define MEMLEAK_ALLOC(addr) \ +({ \ + if(alloc_addr_lock((unsigned long)(addr),&MEMLEAKID)) \ + PROBLEM(); \ +}) +#else +#define MEMLEAK_ALLOC(addr) \ +({ \ + if(alloc_addr_lock((unsigned long)(addr),IDPTR)) \ + PROBLEM(); \ +}) +#define MEMLEAK_ALLOC_NOLOCK(addr) \ +({ \ + if(alloc_addr_nolock((unsigned long)(addr),IDPTR)) \ + PROBLEM(); \ +}) +#endif /* MEMLEAK_PASS_ALLOCATION */ + +#define MEMLEAK_FREE(addr) \ +({ \ + if(free_addr((unsigned long)(addr))) \ + PROBLEM(); \ +}) +#define MEMLEAK_FREE_TRUE(expr,addr) \ +({ \ + if((expr)) \ + MEMLEAK_FREE((addr)); \ +}) + +/* + * Sometimes, it is useful to disable memleak mapping for a specific file. + * In this case, define MEMLEAK_KILL_ALLOCATION in that file. + */ +#ifndef MEMLEAK_KILL_ALLOCATION +#define __get_free_pages(gfp_mask,gfporder) \ + MEMLEAK_WRAP(__get_free_pages,gfp_mask,gfporder) +#else +#define __get_free_pages(gfp_mask,gfporder) \ + __get_free_pages_wrap((gfp_mask),(gfporder),NULL) +#endif + +#define get_free_page(gfp_mask) \ +({ \ + unsigned long _page; \ + _page = __get_free_pages((gfp_mask),0); \ + if (_page) \ + memset((void *) _page, 0, PAGE_SIZE); \ + _page; \ +}) + +#define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0) +#define __get_dma_pages(gfp_mask, order) __get_free_pages(((gfp_mask) | GFP_DMA),(order)) +#endif /* CONFIG_MEMLEAK */ extern int low_on_memory; Index: linux/include/linux/proc_fs.h diff -u linux/include/linux/proc_fs.h:1.1.1.4 linux/include/linux/proc_fs.h:1.1.3.4 --- linux/include/linux/proc_fs.h:1.1.1.4 Sat Apr 17 16:27:07 1999 +++ linux/include/linux/proc_fs.h Sat Apr 24 02:48:09 1999 @@ -52,7 +52,9 @@ PROC_STRAM, PROC_SOUND, PROC_MTRR, /* whether enabled or not */ - PROC_FS + PROC_FS, + PROC_TRACE, + PROC_MEMLEAK }; enum pid_directory_inos { @@ -421,6 +423,12 @@ #endif extern struct inode_operations proc_omirr_inode_operations; extern struct inode_operations proc_ppc_htab_inode_operations; +#ifdef CONFIG_TRACE +extern struct inode_operations proc_trace_inode_operations; +#endif +#ifdef CONFIG_MEMLEAK +extern struct inode_operations proc_memleak_inode_operations; +#endif /* * generic.c Index: linux/include/linux/profiler.h diff -u /dev/null linux/include/linux/profiler.h:1.1.3.1 --- /dev/null Sat Apr 24 02:50:25 1999 +++ linux/include/linux/profiler.h Sun Jan 24 20:12:45 1999 @@ -0,0 +1,85 @@ +#ifndef _LINUX_PROFILER_H +#define _LINUX_PROFILER_H + +#include +#include +#include +#include + +#ifdef __KERNEL__ +#ifdef CONFIG_DEBUG_MCOUNT + +extern void mcount (void); +extern int mcount_internal(profiler_pc_t self_addr); +extern atomic_t mcount_ready; /* controls all mcount() processing */ + +#define SUSPEND_MCOUNT atomic_dec(&mcount_ready) +#define RESUME_MCOUNT atomic_inc(&mcount_ready) +#define SUSPEND_MCOUNT_PROC(x) ((x)->flags |= PF_NO_MCOUNT) +#define RESUME_MCOUNT_PROC(x) ((x)->flags &= ~PF_NO_MCOUNT) + +#ifdef CONFIG_TRACE + +extern atomic_t mcount_trace_ready; /* controls just mcount() tracing */ +/* + * Protect the profiling table with a spin lock, only one cpu at a + * time. No point in read/write locks, almost all accesses are for + * write. Since this code is accessed from all contexts, use + * spin_lock_irqsave. + */ +extern spinlock_t trace_table_lock; + +/* Note: The hierarchy is mcount_ready, mcount_trace_ready, trace_table_lock */ + +struct trace_entry { + profiler_pc_t pc; +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t timestamp; +#endif +#ifdef CONFIG_TRACE_PID + pid_t pid; +#endif +#if defined(CONFIG_TRACE_CPU) && (defined(__SMP__) || defined(CONFIG_SMP)) + unsigned int cpu; +#endif +}; + +extern struct trace_table { + unsigned int table_size; + unsigned int curr_call; + struct trace_entry entries[CONFIG_TRACE_SIZE]; +} *trace_table; + +/* + * die_if_kernel() uses this to 'extend' the stack trace given in an Oops + * message. You can use this when debugging special code, as a debugging aid. + */ +void print_emergency_trace (void); + +#define TRACE_CALIBRATION_CALLS 20 + +#define SUSPEND_MCOUNT_TRACE atomic_dec(&mcount_trace_ready) +#define RESUME_MCOUNT_TRACE atomic_inc(&mcount_trace_ready) +#define LOCK_MCOUNT_TRACE(x) spin_lock_irqsave(&trace_table_lock, x); +#define UNLOCK_MCOUNT_TRACE(x) spin_unlock_irqrestore(&trace_table_lock, x); + +#endif /* CONFIG_TRACE */ +#endif /* CONFIG_DEBUG_MCOUNT */ + +#ifndef CONFIG_DEBUG_MCOUNT +#define SUSPEND_MCOUNT +#define RESUME_MCOUNT +#define SUSPEND_MCOUNT_PROC(x) +#define RESUME_MCOUNT_PROC(x) +#endif /* CONFIG_DEBUG_MCOUNT */ + +#ifndef CONFIG_TRACE +#define SUSPEND_MCOUNT_TRACE +#define RESUME_MCOUNT_TRACE +#define LOCK_MCOUNT_TRACE(x) +#define UNLOCK_MCOUNT_TRACE(x) +#endif /* CONFIG_TRACE */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_PROFILER_H */ Index: linux/include/linux/reboot.h diff -u linux/include/linux/reboot.h:1.1.1.1 linux/include/linux/reboot.h:1.1.3.1 --- linux/include/linux/reboot.h:1.1.1.1 Mon Jan 18 02:27:11 1999 +++ linux/include/linux/reboot.h Sun Jan 24 20:12:34 1999 @@ -20,6 +20,9 @@ * CAD_OFF Ctrl-Alt-Del sequence sends SIGINT to init task. * POWER_OFF Stop OS and remove all power from system, if possible. * RESTART2 Restart system using given command string. + * OOPS Cause a kernel Oops, the machine should continue afterwards. + * STACKFAULT Overflow the kernel stack with recursion. + * KERNEL_LOOP Endless kernel loop, unlocked. */ #define LINUX_REBOOT_CMD_RESTART 0x01234567 @@ -28,7 +31,9 @@ #define LINUX_REBOOT_CMD_CAD_OFF 0x00000000 #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 - +#define LINUX_REBOOT_CMD_OOPS 0x4F6F7001 +#define LINUX_REBOOT_CMD_STACKFAULT 0x53746602 +#define LINUX_REBOOT_CMD_KERNEL_LOOP 0x4C6F7003 #ifdef __KERNEL__ Index: linux/include/linux/sched.h diff -u linux/include/linux/sched.h:1.1.1.6 linux/include/linux/sched.h:1.1.3.4 --- linux/include/linux/sched.h:1.1.1.6 Mon Mar 29 23:40:47 1999 +++ linux/include/linux/sched.h Tue Mar 30 20:11:15 1999 @@ -5,6 +5,7 @@ extern unsigned long event; +#include #include #include #include @@ -312,6 +313,9 @@ struct signal_queue *sigqueue, **sigqueue_tail; unsigned long sas_ss_sp; size_t sas_ss_size; +#ifdef CONFIG_DEBUG_SOFTLOCKUP + unsigned int deadlock_count; +#endif }; /* @@ -329,6 +333,9 @@ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_VFORK 0x00001000 /* Wake up parent in mm_release */ +#ifdef CONFIG_DEBUG_MCOUNT +#define PF_NO_MCOUNT 0x00002000 /* skip mcount() processing */ +#endif #define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ #define PF_DTRACE 0x00200000 /* delayed trace (used on m68k, i386) */ Index: linux/include/linux/skbuff.h diff -u linux/include/linux/skbuff.h:1.1.1.1 linux/include/linux/skbuff.h:1.1.3.1 --- linux/include/linux/skbuff.h:1.1.1.1 Mon Jan 18 02:27:09 1999 +++ linux/include/linux/skbuff.h Sun Jan 24 20:12:36 1999 @@ -143,12 +143,30 @@ extern void skb_unlink(struct sk_buff *buf); extern __u32 skb_queue_len(struct sk_buff_head *list); extern struct sk_buff * skb_peek_copy(struct sk_buff_head *list); + +#ifndef CONFIG_MEMLEAK extern struct sk_buff * alloc_skb(unsigned int size, int priority); extern struct sk_buff * dev_alloc_skb(unsigned int size); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); extern struct sk_buff * skb_copy(struct sk_buff *skb, int priority); extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, int newheadroom); +#else /* CONFIG_MEMLEAK */ + +#include + +extern struct sk_buff * alloc_skb_wrap(unsigned int size, int priority, struct alloc_struct * IDPTR); +extern void kfree_skbmem(struct sk_buff *skb); /* no wrap for this */ +extern struct sk_buff * skb_clone_wrap(struct sk_buff *skb, int priority, struct alloc_struct * IDPTR); +extern struct sk_buff * skb_copy_wrap(struct sk_buff *skb, int priority, struct alloc_struct * IDPTR); +extern struct sk_buff * skb_realloc_headroom_wrap(struct sk_buff *skb, int newheadroom,struct alloc_struct * IDPTR); + +#define alloc_skb(size,priority) MEMLEAK_WRAP(alloc_skb,(size),(priority)) +#define skb_clone(skb,priority) MEMLEAK_WRAP(skb_clone,(skb),(priority)) +#define skb_copy(skb,priority) MEMLEAK_WRAP(skb_copy,(skb),(priority)) +#define skb_realloc_headroom(skb,newheadroom) MEMLEAK_WRAP(skb_realloc_headroom,(skb),(newheadroom)) +#endif /* CONFIG_MEMLEAK */ + #define dev_kfree_skb(a) kfree_skb(a) extern unsigned char * skb_put(struct sk_buff *skb, unsigned int len); extern unsigned char * skb_push(struct sk_buff *skb, unsigned int len); @@ -533,6 +551,7 @@ kfree_skb(skb); } +#ifndef CONFIG_MEMLEAK extern __inline__ struct sk_buff *dev_alloc_skb(unsigned int length) { struct sk_buff *skb; @@ -542,6 +561,16 @@ skb_reserve(skb,16); return skb; } +#else +#define dev_alloc_skb(length) \ +({ \ + struct sk_buff *_skb; \ + _skb = alloc_skb((unsigned int)(length)+16, GFP_ATOMIC); \ + if (_skb) \ + skb_reserve(_skb,16); \ + _skb; \ +}) +#endif /* CONFIG_MEMLEAK */ extern __inline__ struct sk_buff * skb_cow(struct sk_buff *skb, unsigned int headroom) Index: linux/include/linux/slab.h diff -u linux/include/linux/slab.h:1.1.1.1 linux/include/linux/slab.h:1.1.3.1 --- linux/include/linux/slab.h:1.1.1.1 Mon Jan 18 02:27:11 1999 +++ linux/include/linux/slab.h Sun Jan 24 20:12:36 1999 @@ -9,6 +9,8 @@ #if defined(__KERNEL__) +#include + typedef struct kmem_cache_s kmem_cache_t; #include @@ -48,6 +50,7 @@ extern long kmem_cache_init(long, long); extern void kmem_cache_sizes_init(void); extern kmem_cache_t *kmem_find_general_cachep(size_t); +#ifndef CONFIG_MEMLEAK extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *, kmem_cache_t *, unsigned long), void (*)(void *, kmem_cache_t *, unsigned long)); @@ -58,6 +61,33 @@ extern void *kmalloc(size_t, int); extern void kfree(const void *); extern void kfree_s(const void *, size_t); +#else +extern kmem_cache_t *kmem_cache_create_wrap(const char *, size_t, size_t, unsigned long, + void (*)(void *, kmem_cache_t *, unsigned long), + void (*)(void *, kmem_cache_t *, unsigned long), struct alloc_struct *); +extern int kmem_cache_shrink(kmem_cache_t *); /* no wrap for this */ +extern void *kmem_cache_alloc_wrap(kmem_cache_t *, int, struct alloc_struct *); +extern void kmem_cache_free(kmem_cache_t *, void *); /* no wrap for this */ + +extern void *kmalloc_wrap(unsigned int size, int priority, struct alloc_struct *); +extern void kfree(const void *); +extern void kfree_s(const void *, size_t); + +#define kmem_cache_create(name,size,offset,flags,ctor,dtor) \ + MEMLEAK_WRAP(kmem_cache_create,name,size,offset,flags,ctor,dtor) + +#ifndef MEMLEAK_KILL_ALLOCATION +#define kmem_cache_alloc(cachep,flags) \ + MEMLEAK_WRAP(kmem_cache_alloc,cachep,flags) +#else +#define kmem_cache_alloc(cachep,flags) \ + kmem_cache_alloc_wrap((cachep),(flags),NULL) +#endif + +#define kmalloc(size,priority) \ + MEMLEAK_WRAP(kmalloc,size,priority) + +#endif /* CONFIG_MEMLEAK */ extern void kmem_cache_reap(int); extern int get_slabinfo(char *); Index: linux/include/linux/sysctl.h diff -u linux/include/linux/sysctl.h:1.1.1.2 linux/include/linux/sysctl.h:1.1.3.2 --- linux/include/linux/sysctl.h:1.1.1.2 Wed Mar 24 01:52:25 1999 +++ linux/include/linux/sysctl.h Thu Mar 25 02:27:25 1999 @@ -406,6 +406,10 @@ }; /* CTL_DEBUG names: */ +enum { + DEBUG_KSTACK_METER = 1, + DEBUG_DISABLE_MCOUNT = 2, +}; /* CTL_DEV names: */ enum { Index: linux/include/linux/vmalloc.h diff -u linux/include/linux/vmalloc.h:1.1.1.1 linux/include/linux/vmalloc.h:1.1.3.1 --- linux/include/linux/vmalloc.h:1.1.1.1 Mon Jan 18 02:27:11 1999 +++ linux/include/linux/vmalloc.h Sun Jan 24 20:12:42 1999 @@ -13,12 +13,22 @@ struct vm_struct * next; }; -struct vm_struct * get_vm_area(unsigned long size); void vfree(void * addr); -void * vmalloc(unsigned long size); long vread(char *buf, char *addr, unsigned long count); void vmfree_area_pages(unsigned long address, unsigned long size); +#ifndef CONFIG_MEMLEAK +void * vmalloc(unsigned long size); +struct vm_struct * get_vm_area(unsigned long size); int vmalloc_area_pages(unsigned long address, unsigned long size); +#else /* CONFIG_MEMLEAK */ +extern void * vmalloc_wrap(unsigned long size, struct alloc_struct *id); +extern struct vm_struct * get_vm_area_wrap(unsigned long size, struct alloc_struct *id); +extern int vmalloc_area_pages_wrap(unsigned long address, unsigned long size, struct alloc_struct *id); + +#define vmalloc(size) MEMLEAK_WRAP(vmalloc,size) +#define get_vm_area(size) MEMLEAK_WRAP(get_vm_area,size) +#define vmalloc_area_pages(address, size) MEMLEAK_WRAP(vmalloc_area_pages,address,size) +#endif /* CONFIG_MEMLEAK */ #endif Index: linux/include/net/sock.h diff -u linux/include/net/sock.h:1.1.1.7 linux/include/net/sock.h:1.1.3.6 --- linux/include/net/sock.h:1.1.1.7 Sat Apr 17 16:27:21 1999 +++ linux/include/net/sock.h Sat Apr 24 02:48:09 1999 @@ -706,16 +706,38 @@ return a; } -extern struct sock * sk_alloc(int family, int priority, int zero_it); extern void sk_free(struct sock *sk); extern void destroy_sock(struct sock *sk); +#ifndef CONFIG_MEMLEAK extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority); +extern void *sock_kmalloc(struct sock *sk, int size, int priority); +extern struct sock * sk_alloc(int family, int priority, int zero_it); +#else +extern struct sk_buff *sock_wmalloc_wrap(struct sock *sk, + unsigned long size, int force, + int priority, struct alloc_struct *IDPTR); +extern struct sk_buff *sock_rmalloc_wrap(struct sock *sk, + unsigned long size, int force, + int priority, struct alloc_struct *IDPTR); +extern void *sock_kmalloc_wrap(struct sock *sk, int size, int priority, struct alloc_struct *IDPTR); +extern struct sock * sk_alloc_wrap(int family, int priority, int zero_it, struct alloc_struct *IDPTR); + +#define sock_wmalloc(sk,size,force,priority) \ + MEMLEAK_WRAP(sock_wmalloc,(sk),(size),(force),(priority)) +#define sock_rmalloc(sk,size,force,priority) \ + MEMLEAK_WRAP(sock_rmalloc,(sk),(size),(force),(priority)) +#define sock_kmalloc(sk,size,priority) \ + MEMLEAK_WRAP(sock_kmalloc,(sk),(size),(priority)) +#define sk_alloc(family,priority,zero_it) \ + MEMLEAK_WRAP(sk_alloc,(family),(priority),(zero_it)) + +#endif /* CONFIG_MEMLEAK */ extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); extern unsigned long sock_rspace(struct sock *sk); @@ -728,12 +750,22 @@ extern int sock_getsockopt(struct socket *sock, int level, int op, char *optval, int *optlen); +#ifndef CONFIG_MEMLEAK extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode); -extern void *sock_kmalloc(struct sock *sk, int size, int priority); +#else +extern struct sk_buff *sock_alloc_send_skb_wrap(struct sock *sk, + unsigned long size, + unsigned long fallback, + int noblock, + int *errcode, + struct alloc_struct *IDPTR); +#define sock_alloc_send_skb(sk,size,fallback,noblock,errcode) \ + MEMLEAK_WRAP(sock_alloc_send_skb,sk,size,fallback,noblock,errcode) +#endif extern void sock_kfree_s(struct sock *sk, void *mem, int size); Index: linux/init/main.c diff -u linux/init/main.c:1.1.1.5 linux/init/main.c:1.1.3.4 --- linux/init/main.c:1.1.1.5 Sat Apr 17 16:27:27 1999 +++ linux/init/main.c Sat Apr 24 02:48:09 1999 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -339,6 +340,14 @@ extern void md_setup(char *str,int *ints) __init; #endif +#ifdef __sparc__ +extern int serial_console; +#endif + +#ifdef CONFIG_DEBUG_MCOUNT +extern void mcount_init(void); +#endif + /* * Boot command-line arguments */ @@ -1100,6 +1109,32 @@ extern void initialize_secondary(void); +#if defined(__SMP__) && defined(CONFIG_KERNEL_DEBUGGING) +void show_one (int i) +{ + static int curr=0x12345678; + + curr++; + *(((volatile int *)0x000b8000)+i)=curr; + *(((volatile int *)0x000b8100)+i)=curr; + *(((volatile int *)0x000b8200)+i)=curr; + *(((volatile int *)0x000b8300)+i)=curr; +} + +void show_us(void) +{ + for (;;) { + __cli(); + show_one(0); + show_one(10); + show_one(20); + show_one(30); + show_one(40); + show_one(50); + } +} +#endif + /* * Activate the first processor. */ @@ -1123,12 +1158,28 @@ printk(linux_banner); setup_arch(&command_line, &memory_start, &memory_end); memory_start = paging_init(memory_start,memory_end); +#ifdef CONFIG_MEMLEAK + /* + * memleak_init must run before other xx_init() will start + * eating ram. + */ + memory_end = memleak_init(memory_start,memory_end); +#endif trap_init(); init_IRQ(); sched_init(); time_init(); parse_options(command_line); + if (prof_shift) { + prof_buffer = (unsigned int *) memory_start; + /* only text is profiled */ + prof_len = (unsigned long) &_etext - (unsigned long) &_stext; + prof_len >>= prof_shift; + memory_start += prof_len * sizeof(unsigned int); + memset(prof_buffer, 0, prof_len * sizeof(unsigned int)); + } + /* * HACK ALERT! This is early. We're enabling the console before * we've done PCI setups etc, and console_init() must be aware of @@ -1138,14 +1189,6 @@ #ifdef CONFIG_MODULES init_modules(); #endif - if (prof_shift) { - prof_buffer = (unsigned int *) memory_start; - /* only text is profiled */ - prof_len = (unsigned long) &_etext - (unsigned long) &_stext; - prof_len >>= prof_shift; - memory_start += prof_len * sizeof(unsigned int); - memset(prof_buffer, 0, prof_len * sizeof(unsigned int)); - } memory_start = kmem_cache_init(memory_start, memory_end); sti(); @@ -1178,6 +1221,10 @@ #endif check_bugs(); printk("POSIX conformance testing by UNIFIX\n"); + +#ifdef CONFIG_DEBUG_MCOUNT + mcount_init(); +#endif /* * We count on the initial thread going ok Index: linux/kernel/Makefile diff -u linux/kernel/Makefile:1.1.1.1 linux/kernel/Makefile:1.1.3.1 --- linux/kernel/Makefile:1.1.1.1 Mon Jan 18 02:26:59 1999 +++ linux/kernel/Makefile Sun Jan 24 20:13:06 1999 @@ -10,6 +10,10 @@ .S.s: $(CPP) -traditional $< -o $*.s +SUB_DIRS := debug +MOD_SUB_DIRS := debug +ALL_SUB_DIRS := debug + O_TARGET := kernel.o O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ Index: linux/kernel/fork.c diff -u linux/kernel/fork.c:1.1.1.7 linux/kernel/fork.c:1.1.3.5 --- linux/kernel/fork.c:1.1.1.7 Sat Apr 17 16:27:28 1999 +++ linux/kernel/fork.c Sat Apr 24 02:48:09 1999 @@ -11,12 +11,14 @@ * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()' */ +#include #include #include #include #include #include #include +#include #include #include @@ -572,9 +574,14 @@ p->did_exec = 0; p->swappable = 0; +#ifdef CONFIG_DEBUG_SOFTLOCKUP +#warning Do something about KSTACK here ... + p->deadlock_count=CONFIG_SOFTLOCKUP_THRESHOLD; +#endif p->state = TASK_UNINTERRUPTIBLE; copy_flags(clone_flags, p); + RESUME_MCOUNT_PROC(p); p->pid = get_pid(clone_flags); /* Index: linux/kernel/ksyms.c diff -u linux/kernel/ksyms.c:1.1.1.7 linux/kernel/ksyms.c:1.1.3.3 --- linux/kernel/ksyms.c:1.1.1.7 Wed Mar 24 01:53:16 1999 +++ linux/kernel/ksyms.c Thu Mar 25 02:27:25 1999 @@ -87,18 +87,29 @@ EXPORT_SYMBOL(exit_sighand); /* internal kernel memory management */ +#ifndef CONFIG_MEMLEAK EXPORT_SYMBOL(__get_free_pages); +EXPORT_SYMBOL(kmem_cache_create); +EXPORT_SYMBOL(kmem_cache_alloc); +EXPORT_SYMBOL(kmalloc); +EXPORT_SYMBOL(vmalloc); +#else +EXPORT_SYMBOL(__get_free_pages_wrap); +EXPORT_SYMBOL(kmem_cache_create_wrap); +EXPORT_SYMBOL(kmem_cache_alloc_wrap); +EXPORT_SYMBOL(kmalloc_wrap); +EXPORT_SYMBOL(vmalloc_wrap); +EXPORT_SYMBOL(alloc_addr_lock); +EXPORT_SYMBOL(alloc_addr_nolock); +EXPORT_SYMBOL(free_addr); +#endif /* CONFIG_MEMLEAK */ EXPORT_SYMBOL(free_pages); EXPORT_SYMBOL(__free_page); EXPORT_SYMBOL(kmem_find_general_cachep); -EXPORT_SYMBOL(kmem_cache_create); EXPORT_SYMBOL(kmem_cache_shrink); -EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_free); -EXPORT_SYMBOL(kmalloc); EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(kfree_s); -EXPORT_SYMBOL(vmalloc); EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(remap_page_range); @@ -329,6 +340,10 @@ EXPORT_SYMBOL(system_utsname); /* UTS data */ EXPORT_SYMBOL(uts_sem); /* UTS semaphore */ EXPORT_SYMBOL(sys_call_table); +#ifdef CONFIG_XKDEBUG +EXPORT_SYMBOL(kernel_int1_hook); +EXPORT_SYMBOL(kernel_int3_hook); +#endif /* CONFIG_XKDEBUG */ EXPORT_SYMBOL(machine_restart); EXPORT_SYMBOL(machine_halt); EXPORT_SYMBOL(machine_power_off); Index: linux/kernel/panic.c diff -u linux/kernel/panic.c:1.1.1.1 linux/kernel/panic.c:1.1.3.1 --- linux/kernel/panic.c:1.1.1.1 Mon Jan 18 02:26:59 1999 +++ linux/kernel/panic.c Sun Jan 24 20:13:07 1999 @@ -14,6 +14,9 @@ #include #include #include +#ifdef CONFIG_TRACE +#include +#endif #ifdef __alpha__ #include @@ -36,6 +39,9 @@ static char buf[1024]; va_list args; +#ifdef CONFIG_TRACE + SUSPEND_MCOUNT_TRACE; +#endif va_start(args, fmt); vsprintf(buf, fmt, args); va_end(args); Index: linux/kernel/printk.c diff -u linux/kernel/printk.c:1.1.1.2 linux/kernel/printk.c:1.1.3.2 --- linux/kernel/printk.c:1.1.1.2 Tue Mar 9 01:55:51 1999 +++ linux/kernel/printk.c Fri Mar 19 01:23:28 1999 @@ -10,6 +10,8 @@ * elsewhere, in preparation for a serial line console (someday). * Ted Ts'o, 2/11/93. * Modified for sysctl support, 1/8/97, Chris Horn. + * syslog_to_console for SysRQ dumploGs. 12/04/1998. + * Keith Owens */ #include @@ -214,6 +216,46 @@ out: unlock_kernel(); return error; +} + +void syslog_to_console(void) +{ + /* + * Copy the syslog buffer to all registered consoles. Like + * sys_syslog, option 3 but to console instead of user. Raw data, + * no attempt to find record headers, message levels etc. + * Intended as a last ditch dump of syslog. + */ + unsigned long i, j, count, flags; + char *p, buf[129]; /* copy log in 128 byte chunks */ + + /* + * The logged_chars, log_start, and log_size values may + * change from an interrupt, so we disable interrupts. + */ + __save_flags(flags); + __cli(); + count = LOG_BUF_LEN; + if (count > logged_chars) + count = logged_chars; + j = log_start + log_size - count; + __restore_flags(flags); + /* Race here, the log can change under us, we might dump garbage. + * Live with it, this is a last ditch output, waiting for locks + * could stop output. console_print should not require locks. + */ + for (i = 0, p = buf; i < count; i++) { + *p++ = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1))); + if (p == buf+sizeof(buf)-1) { + *p = '\0'; + console_print(buf); + p = buf; + } + } + if (p != buf) { + *p = '\0'; + console_print(buf); + } } asmlinkage int sys_syslog(int type, char * buf, int len) Index: linux/kernel/sched.c diff -u linux/kernel/sched.c:1.1.1.8 linux/kernel/sched.c:1.1.3.4 --- linux/kernel/sched.c:1.1.1.8 Wed Mar 24 01:53:17 1999 +++ linux/kernel/sched.c Thu Mar 25 02:27:25 1999 @@ -24,6 +24,11 @@ * current-task */ +/* + * Semaphore deadlock detector. Copyright (C) 1999 Andrea Arcangeli + */ + +#include #include #include #include @@ -39,6 +44,7 @@ #include #include +#include /* * kernel variables @@ -773,6 +779,10 @@ #endif kstat.context_swtch++; get_mmu_context(next); + RESUME_MCOUNT_PROC(prev); +#ifdef CONFIG_DEBUG_SOFTLOCKUP + prev->deadlock_count=CONFIG_SOFTLOCKUP_THRESHOLD; +#endif switch_to(prev,next); __schedule_tail(); @@ -902,14 +912,37 @@ tsk->state = TASK_RUNNING; \ remove_wait_queue(&sem->wait, &wait); +#ifdef CONFIG_SEMAPHORE_DEADLOCK +static void generate_oops (struct semaphore *sem) +{ + sema_init(sem, 9876); + wake_up(&sem->wait); +} +#endif + void __down(struct semaphore * sem) { DOWN_VAR +#ifdef CONFIG_SEMAPHORE_DEADLOCK + struct timer_list timer; + init_timer (&timer); + timer.expires = jiffies + HZ*20; + timer.data = (unsigned long) sem; + timer.function = (void (*)(unsigned long)) generate_oops; + add_timer(&timer); +#endif DOWN_HEAD(TASK_UNINTERRUPTIBLE) if (waking_non_zero(sem)) break; schedule(); +#ifdef CONFIG_SEMAPHORE_DEADLOCK + if (atomic_read(&sem->count) == 9876) + *(int *) 0 = 0; +#endif DOWN_TAIL(TASK_UNINTERRUPTIBLE) +#ifdef CONFIG_SEMAPHORE_DEADLOCK + del_timer(&timer); +#endif } int __down_interruptible(struct semaphore * sem) Index: linux/kernel/softirq.c diff -u linux/kernel/softirq.c:1.1.1.2 linux/kernel/softirq.c:1.1.3.2 --- linux/kernel/softirq.c:1.1.1.2 Wed Mar 24 01:53:18 1999 +++ linux/kernel/softirq.c Thu Mar 25 02:27:25 1999 @@ -11,10 +11,12 @@ * due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli */ +#include #include #include #include #include +#include #include @@ -66,5 +68,8 @@ hardirq_endlock(cpu); } softirq_endlock(cpu); +#if defined(CONFIG_DEBUG_SOFTLOCKUP) && (defined(__SMP__) || defined(CONFIG_SMP)) + mcount(); +#endif } } Index: linux/kernel/sys.c diff -u linux/kernel/sys.c:1.1.1.1 linux/kernel/sys.c:1.1.3.1 --- linux/kernel/sys.c:1.1.1.1 Mon Jan 18 02:27:00 1999 +++ linux/kernel/sys.c Sun Jan 24 20:13:08 1999 @@ -144,7 +144,38 @@ return max_prio; } +/* routines to trip various softlockup conditions, driven from reboot */ +static void kstack_test1 (void); +static void kstack_test2 (void); +static void kstack_test3 (void); +static void kstack_test4 (void); +static void kstack_test1 (void) +{ + kstack_test2(); +} + +static void kstack_test2 (void) +{ + kstack_test3(); +} + +static void kstack_test3 (void) +{ + kstack_test4(); +} + +static void kstack_test4 (void) +{ + kstack_test1(); /* curse and recurse, stack overflow */ +} + +static volatile int softlockup_count=0; +void softlockup_looptest(void) +{ + softlockup_count++; +} + /* * Reboot system call: for obvious reasons only root may call it, * and even root needs to set up some magic numbers in the registers @@ -207,6 +238,34 @@ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer); machine_restart(buffer); + break; + + case LINUX_REBOOT_CMD_OOPS: + /* Kernel oops, the machine should recover afterwards */ + *(char *)0=0; + break; + + /* Trip various software lockup conditions. Overloading sys_reboot + * because they do not justify their own syscall. These do not notify + * the reboot list. + */ + + case LINUX_REBOOT_CMD_STACKFAULT: + /* stack fault via endless recursion */ +#ifndef CONFIG_DEBUG_KSTACK + printk(KERN_WARNING "Invoking STACKFAULT without CONFIG_DEBUG_KSTACK\n" + "Machine may not recover!\n"); +#endif + kstack_test1(); + break; + + case LINUX_REBOOT_CMD_KERNEL_LOOP: + /* lockup via endless loop */ +#ifndef CONFIG_DEBUG_SOFTLOCKUP + printk(KERN_WARNING "Invoking KERNEL_LOOP without CONFIG_DEBUG_SOFTLOCKUP\n" + "Machine may not recover!\n"); +#endif + for (;;) softlockup_looptest(); break; default: Index: linux/kernel/sysctl.c diff -u linux/kernel/sysctl.c:1.1.1.2 linux/kernel/sysctl.c:1.1.3.2 --- linux/kernel/sysctl.c:1.1.1.2 Tue Feb 16 18:01:52 1999 +++ linux/kernel/sysctl.c Tue Feb 23 22:52:51 1999 @@ -264,7 +264,22 @@ {0} }; +#ifdef CONFIG_DEBUG_MCOUNT +extern int sysctl_disable_mcount; +#ifdef CONFIG_KSTACK_METER +extern int kstack_meter[]; +#endif +#endif + static ctl_table debug_table[] = { +#ifdef CONFIG_DEBUG_MCOUNT + {DEBUG_DISABLE_MCOUNT, "disable_mcount", &sysctl_disable_mcount, + sizeof(int), 0644, NULL, &proc_dointvec}, +#ifdef CONFIG_KSTACK_METER + {DEBUG_KSTACK_METER, "kstack_meter", &kstack_meter, 2*sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif +#endif {0} }; Index: linux/kernel/debug/Config.in diff -u /dev/null linux/kernel/debug/Config.in:1.1.3.2 --- /dev/null Sat Apr 24 02:50:30 1999 +++ linux/kernel/debug/Config.in Mon Jan 25 14:41:27 1999 @@ -0,0 +1,39 @@ +# +# Common kernel debugging configuration. arch specific debugging facilities +# are in arch/xxx/config.in. +# + bool 'Kernel debugging support' CONFIG_KERNEL_DEBUGGING n + if [ "$CONFIG_KERNEL_DEBUGGING" = "y" ]; then + bool ' Semphore deadlock detector' CONFIG_SEMAPHORE_DEADLOCK n + bool ' Debug kernel stack overflows' CONFIG_DEBUG_KSTACK n + if [ "$CONFIG_DEBUG_KSTACK" = "y" ]; then + int ' Stack threshold' CONFIG_KSTACK_THRESHOLD 500 + fi + bool ' Kernel Stack Meter' CONFIG_KSTACK_METER n + bool ' Detect software lockups' CONFIG_DEBUG_SOFTLOCKUP n + if [ "$CONFIG_DEBUG_SOFTLOCKUP" = "y" ]; then + int ' Deadlock threshold' CONFIG_SOFTLOCKUP_THRESHOLD 100000000 0 2147483647 + fi + bool ' GCC profiling support' CONFIG_PROFILE_GCC n + bool ' Enable kernel tracer' CONFIG_TRACE n + if [ "$CONFIG_TRACE" = "y" ]; then + int ' Trace ringbuffer size' CONFIG_TRACE_SIZE 16384 + bool ' Trace timestamps' CONFIG_TRACE_TIMESTAMP n + if [ "$CONFIG_TRACE_TIMESTAMP" = "y" ]; then + bool ' Truncate timestamp' CONFIG_TRACE_TRUNCTIME n + fi + bool ' Process ID' CONFIG_TRACE_PID n + bool ' Cpu ID' CONFIG_TRACE_CPU n + fi + # CONFIG_DEBUG_MCOUNT is "y" iff an option requires calls to mcount(). + if [ "$CONFIG_DEBUG_KSTACK" = "y" -o \ + "$CONFIG_DEBUG_SOFTLOCKUP" = "y" -o \ + "$CONFIG_KSTACK_METER" = "y" -o \ + "$CONFIG_TRACE" = "y" -o \ + "$CONFIG_PRINT_EIP" = "y" -o \ + "$CONFIG_PROFILE_GCC" = "y" ]; then + define_bool CONFIG_DEBUG_MCOUNT y + else + define_bool CONFIG_DEBUG_MCOUNT n + fi + fi Index: linux/kernel/debug/Makefile diff -u /dev/null linux/kernel/debug/Makefile:1.1.3.1 --- /dev/null Sat Apr 24 02:50:30 1999 +++ linux/kernel/debug/Makefile Sun Jan 24 20:13:08 1999 @@ -0,0 +1,17 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +ifeq ($(CONFIG_KERNEL_DEBUGGING),y) + O_TARGET := debug.o + OX_OBJS = profiler.o + # Must turn off profiling for the profiler. + override CFLAGS := $(CFLAGS:%-pg=%-g -c) +endif + +include $(TOPDIR)/Rules.make Index: linux/kernel/debug/profiler.c diff -u /dev/null linux/kernel/debug/profiler.c:1.1.3.1 --- /dev/null Sat Apr 24 02:50:30 1999 +++ linux/kernel/debug/profiler.c Sun Jan 24 20:13:08 1999 @@ -0,0 +1,411 @@ +/* + * linux/kernel/profiler.c + * + * Copyright (C) 1997 Ingo Molnar, Richard Henderson + * Copyright (C) 1998 Andrea Arcangeli + * + * This source is covered by the GNU GPL, the same as all kernel sources. + */ + +/* + * 'profiler.c' implements various profiling hacks, by abusing the profiling + * hook 'mcount', generated by GCC -pg + * + * Currently used for: + * + * - monitoring kernel stack usage and generating oopses when stack overflow + * - detecting software lockups + * - tracing the kernel + * + * Has to be a separate C module, because we have to compile it without -pg, + * to avoid recursion. + */ + +/* + * - print-eip is now a config option and it' s improved to give as the + * the execution order of the box and fixed some glitches. + * - developed CONFIG_PROFILE_GCC + * - developed CONFIG_KSTACK_METER + * - fixed get_stack_left() to handle the 8k 2.1.x kernel stack size. + * -arca + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Generally we dislike #ifdef's in main modules, but these mcount() based + * features are is too performance-sensitive to make them an all or nothing + * option, and too small to be put into header files. + */ + +#ifdef CONFIG_DEBUG_MCOUNT /* any mcount() functions activated? */ + +#ifdef CONFIG_TRACE + +spinlock_t trace_table_lock = SPIN_LOCK_UNLOCKED; +struct trace_table *trace_table = NULL; + +#endif /* CONFIG_TRACE */ + +#ifdef CONFIG_KSTACK_METER +struct { + unsigned int min_left_stack; + profiler_pc_t stack_eater_eip; +} kstack_meter = {-1UL, 0,}; + +static spinlock_t stack_meter_lock = SPIN_LOCK_UNLOCKED; +#endif + +/* deal with too early calls to mcount() and recursion */ +atomic_t mcount_ready = ATOMIC_INIT(0); +int sysctl_disable_mcount = 0; +#ifdef CONFIG_TRACE +atomic_t mcount_trace_ready = ATOMIC_INIT(0); +#endif + +void mcount_init (void) +{ +#ifdef CONFIG_TRACE + if ((trace_table = vmalloc(sizeof(*trace_table))) == NULL) { + printk("mcount_init: cannot vmalloc trace_table, size %lu. No tracing possible.\n", (unsigned long) sizeof(*trace_table)); + } + else { + trace_table->table_size = CONFIG_TRACE_SIZE; + trace_table->curr_call = 0; + memset(trace_table->entries, 0, sizeof(trace_table->entries)); + spin_lock_init(&trace_table_lock); +#ifdef CONFIG_TRACE_TIMESTAMP +#ifdef __i386__ + if (!(boot_cpu_data.x86_capability & 0x10)) + printk("mcount_init: cpu does not support rdtsc, timestamps are jiffies instead\n"); +#else + printk("mcount_init: not i386 cpu, timestamps are jiffies instead\n"); +#endif /* __i386__ */ +#endif /* CONFIG_TRACE_TIMESTAMP */ + RESUME_MCOUNT_TRACE; /* start it */ + } +#endif /* CONFIG_TRACE */ + + printk("mcount_init\n"); + /* + * Ok, from now on it's for real: + */ + RESUME_MCOUNT; /* start it */ +} + +#ifdef CONFIG_TRACE + +/* Strictly speaking this routine should get the trace_table spin lock. + * However it is rarely used and may not be in a safe context to get the + * lock so we just dump the table and hope it does not change under us. + */ + +void print_emergency_trace (void) +{ + struct trace_entry *t; + int i, j; + + SUSPEND_MCOUNT_TRACE; + printk ("[] "); + +/* + * Well, 30 entries is pretty arbitrary, seems to be a reasonable value. + */ + j = trace_table->curr_call-30; + for (i=0; i<30; i++) { + j %= CONFIG_TRACE_SIZE; /* wraparound */ + t = &(trace_table->entries[j++]); + /* ksymoops expects [] */ + printk ("[<%08lx>] ", t->pc); +#ifdef CONFIG_TRACE_PID + printk("%d ", t->pid); +#endif +#if defined(CONFIG_TRACE_CPU) && defined(__SMP__) + printk("%d ", t->cpu); +#endif + } + RESUME_MCOUNT_TRACE; +} +#endif /* CONFIG_TRACE */ + +#ifdef __i386__ +/* + * this (64 bytes) is twice as big as cachelines, but we cannot + * guarantee cacheline alignment ... too bad. So we waste two + * cachelines in the bad case. + * + * cacheline alignment is absolutely vital in this case, as these + * variables are higher frequented than say .. "current", and they + * should stay local on the owner CPU under all circumstances. + */ +struct cacheline_t { unsigned int i; int __dummy[15]; }; + +#ifdef CONFIG_PRINT_EIP +/* + * Use this as last resort, when nothing else helps. If a hard lockup + * happens then you can decode the last EIP from the binary coded + * form on the screen. + */ + +static __inline__ void print_eip(unsigned int eip) +{ +#define video ((short int *)(0x000b8000 + __PAGE_OFFSET)) +#define HISTORY 24 +#define ALIGN __attribute__((aligned(4))) + + int i, value; + unsigned int tmp; + + /* + * We split the codepath in a dumb way, to get speed and proper + * per-CPU execution. + */ +#ifdef __SMP__ + if (!smp_processor_id()) + { +#endif + static struct cacheline_t curr_pos_0 ALIGN ={0,}; + static unsigned int count_0 = 0; + /* + * we cover 1M of code currently ... should be enuff + */ + if ((curr_pos_0.i += 80) == HISTORY*80) + curr_pos_0.i = 0; + + for (i=7; i>=0; i--) + { + /* + * mask off the hexa digits one by one. + */ + value = eip & 0xf; + if (value<10) + *(video+i+curr_pos_0.i) = 0x5400 + (value+'0'); + else + *(video+i+curr_pos_0.i) = 0x5400 + (value-10+'a'); + eip >>= 4; + } + /* *(video+8+curr_pos_0.i) = 0x5400 + '=';*/ + tmp = count_0++; + for (i=3; i>=0; i--) + { + /* + * mask off the hexa digits one by one. + */ + value = tmp & 0xf; + if (value<10) + *(video+i+9+curr_pos_0.i) = 0x5400 + (value+'0'); + else + *(video+i+9+curr_pos_0.i) = 0x5400 + (value-10+'a'); + tmp >>= 4; + } +#ifdef __SMP__ + } else { + static struct cacheline_t curr_pos_1 ALIGN ={0,}; + static unsigned int count_1 = 0; + /* + * we cover 1M of code currently ... should be enuff + */ + + if ((curr_pos_1.i += 80) == HISTORY*80) + curr_pos_1.i = 0; + + for (i=7; i>=0; i--) { + /* + * mask off the hexa digits one by one. + */ + value = eip & 0xf; + if (value<10) + *(video+40+i+curr_pos_1.i) = 0x6400 + (value+'0'); + else + *(video+40+i+curr_pos_1.i) = 0x6400 + (value-10+'a'); + eip >>= 4; + } + /* *(video+48+curr_pos_1.i) = 0x6400 + '=';*/ + tmp = count_1++; + for (i=3; i>=0; i--) { + /* + * mask off the hexa digits one by one. + */ + value = tmp & 0xf; + if (value<10) + *(video+i+49+curr_pos_1.i) = 0x6400 + (value+'0'); + else + *(video+i+49+curr_pos_1.i) = 0x6400 + (value-10+'a'); + tmp >>= 4; + } + } +#endif /* __SMP__ */ + +#undef ALIGN +#undef HISTORY +#undef video +} + +#endif /* CONFIG_PRINT_EIP */ +#endif /* __i386__ */ + +#ifdef CONFIG_PROFILE_GCC /* arca */ +static __inline__ void kernel_profiling(profiler_pc_t eip) +{ + extern char _stext; + extern unsigned int * prof_buffer; + + if (!prof_buffer) + return; + + eip -= (unsigned long) &_stext; + eip >>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (eip > prof_len-1) + eip = prof_len-1; + + atomic_inc((atomic_t *)&prof_buffer[eip]); +} +#endif + +/* Watch this routine and mcount for any hidden calls to external + * routines. On SMP, something as simple as save_flags() calls + * __global_save_flags() in irq.c. If that module was compiled with + * -pg it calls back to mcount, stack overflow due to recursion. nm + * profiler.o should show no references to external procedures except + * for printk and vmalloc (from mcount_init). KAO. + */ + +inline int mcount_internal(profiler_pc_t self_addr) +{ +#ifdef CONFIG_PRINT_EIP + print_eip(self_addr); +#endif + +#ifdef CONFIG_PROFILE_GCC + kernel_profiling(self_addr); +#endif + +#ifdef CONFIG_DEBUG_SOFTLOCKUP + switch (current->deadlock_count) { + case 0: + if (current->pid) { + SUSPEND_MCOUNT; + printk("Deadlock threshold zero, should not happen, pid %d\n", current->pid); + RESUME_MCOUNT; + } + current->deadlock_count--; + return 0; + + case 1: + /* + * Oops on return. Do the oops outside this routine so + * mcount_ready and trace_table_lock are in a clean state. + */ + current->deadlock_count = 0; + /* no more mcount() processing for this process */ + SUSPEND_MCOUNT_PROC(current); + printk("Deadlock threshold exceeded, forcing Oops.\n"); + return 1; /* caller should oops */ + break; + + default: + current->deadlock_count--; + break; + } +#endif /* CONFIG_DEBUG_SOFTLOCKUP */ + +#ifdef CONFIG_DEBUG_KSTACK + if (get_stack_left() - sizeof(struct task_struct) /* accounted the tss -arca */ + < CONFIG_KSTACK_THRESHOLD) { + SUSPEND_MCOUNT_PROC(current); + printk(KERN_ALERT "kernel stack overflow. Forcing Oops.\n"); + return 1; + } +#endif /* CONFIG_DEBUG_KSTACK */ + +#ifdef CONFIG_KSTACK_METER /* arca */ + { + unsigned int left_stack, flags; + + /* + * One CPU per time to be sure that min_left_stack is really + * the minimum. -arca + */ + spin_lock_irqsave(&stack_meter_lock, flags); + left_stack = get_stack_left() - sizeof(struct task_struct); + if (left_stack < kstack_meter.min_left_stack) + { + kstack_meter.min_left_stack = left_stack; + kstack_meter.stack_eater_eip = self_addr; + } + spin_unlock_irqrestore(&stack_meter_lock, flags); + } +#endif + +#ifdef CONFIG_TRACE + { + /* Protected by trace_table_lock */ + struct trace_entry *t; + ++(trace_table->curr_call); + while (trace_table->curr_call >= CONFIG_TRACE_SIZE) { + trace_table->curr_call -= CONFIG_TRACE_SIZE; + } + + t = &(trace_table->entries[trace_table->curr_call]); + + t->pc = self_addr; +#ifdef CONFIG_TRACE_TIMESTAMP + t->timestamp = get_profiler_timestamp(); +#endif +#ifdef CONFIG_TRACE_PID + t->pid = current->pid; +#endif +#if defined(CONFIG_TRACE_CPU) && defined(__SMP__) + t->cpu = smp_processor_id(); +#endif + } +#endif /* CONFIG_TRACE */ + return 0; +} + +#ifdef __i386__ + +void mcount(void) +{ + int do_oops; +#ifdef CONFIG_TRACE + unsigned long flags; +#endif + if (sysctl_disable_mcount || atomic_read(&mcount_ready) <= 0) + return; + +#ifdef CONFIG_TRACE + if (atomic_read(&mcount_trace_ready) <= 0) + return; +#endif + + if (current->flags & PF_NO_MCOUNT) + return; + + LOCK_MCOUNT_TRACE(flags); + do_oops = mcount_internal((profiler_pc_t)__builtin_return_address(0)); + UNLOCK_MCOUNT_TRACE(flags); + + /* Do oops with mcount_ready and trace_table_lock in a clean state */ + if (do_oops) + *(char *)0=0; +} + +#ifdef CONFIG_MODULES +EXPORT_SYMBOL_NOVERS(mcount); +#endif + +#endif /* __i386__ */ + +#endif /* CONFIG_DEBUG_MCOUNT */ Index: linux/mm/Makefile diff -u linux/mm/Makefile:1.1.1.1 linux/mm/Makefile:1.1.3.1 --- linux/mm/Makefile:1.1.1.1 Mon Jan 18 02:27:01 1999 +++ linux/mm/Makefile Sun Jan 24 20:13:09 1999 @@ -12,4 +12,8 @@ vmalloc.o slab.o \ swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o +ifeq ($(CONFIG_MEMLEAK),y) +O_OBJS += memleak.o +endif + include $(TOPDIR)/Rules.make Index: linux/mm/memleak.c diff -u /dev/null linux/mm/memleak.c:1.1.3.1 --- /dev/null Sat Apr 24 02:50:30 1999 +++ linux/mm/memleak.c Sun Jan 24 20:13:10 1999 @@ -0,0 +1,265 @@ +/* + * linux/mm/memleak.c, memory leak detector + * + * Copyright (C) 1997 Ingo Molnar + * + * Maintainer: Mike Galbraith mikeg@weiden.de + * + * Changelog: + * Dec 18 1997: memleak.c - added wrapper functions for slab.c + * Dec 18 1997: include/asm-i386/pgtable.h + * converted pte_alloc_kernel, pte_alloc, pgd_alloc to macros + */ + +#include +#include +#include +#include +#include +#include + +/* + * Design: + * + * Every 32 bytes block of memory in the system has an 'allocation map entry'. + * A map entry is a two-fields bitmap, an 'ID' pointing to the allocator, + * and a ~2 seconds granularity timestamp to see object age. + * ID 0 means the block is empty. The allocation map is a very big static + * array, and is preallocated at boot, and put at the end of memory. + * + * This method relies on the fact that no object allocated in Linux + * is smaller than 32 bytes. True that we waste ~10% memory, but the + * method is very simple, extremely fast and generic. There are lots of + * systems that can tolerate 10% less memory, but almost no system + * can tolerate the CPU load caused by O(N) or O(log(N)) 'bookeeping + * algorithms' when allocating memory in a RL system. + * + * This method is a O(1) algorithm. + * + * + * Currently wrapped allocators: + * + * generic page allocator: get_free_pages()/free_pages() + * kernel allocator: kmalloc()/kfree()/kfree_s() + * kmem_cache_create()/kmem_cache_shrink() + * kmem_cache_alloc()/kmem_cache_free() + * networking allocator: skb_alloc()/skb_free() + * + * vmalloc()/vfree() will probably never be supported by + * this method, maybe we can represent them through their + * first physical page. It's not a common allocation + * method. + */ + +#define MIN_MEMBLOCK_SIZE 32 + +#define IDX(addr) (__pa(addr)/MIN_MEMBLOCK_SIZE) + +/* + * We want to keep the allocation map as small as possible + */ +#define ID_BITS 10 +#define MAX_ID (1<", 0 }; +static unsigned int curr_id = 0; +spinlock_t memleak_alloc_lock = SPIN_LOCK_UNLOCKED; + +int alloc_addr_lock(unsigned long addr, struct alloc_struct * id) +{ + unsigned long flags, idx; + + if(!curr_id || !id) /* don't do anything if turned off */ + return 0; + + idx = IDX(addr); + + if (idx > ENTRIES) { + PROBLEM(); + return -1; + } + + spin_lock_irqsave(&memleak_alloc_lock, flags); + + /* If someone else registered ID while I was aquiring, take shortcut + * and only make the alloc_map entry. + */ + if(id->id) + goto alloc_ok; + else + { +#if 0 + printk("allocating ID.%d for %s:%d.\n",curr_id, id->file, id->line); +#endif + id->id = curr_id; + curr_id++; + if (curr_id == MAX_ID) { + printk("ID wrapped around, stopping ID allocation.\n"); + printk("Increase ID_BITS in memleak.c.\n"); + curr_id = 0; + } else + id_map[curr_id-1] = id; + } +alloc_ok: + alloc_map[idx].id = id->id; + alloc_map[idx].timestamp = jiffies>>ID_BITS; + spin_unlock_irqrestore(&memleak_alloc_lock, flags); + return 0; +} + +int alloc_addr_nolock(unsigned long addr, struct alloc_struct * id) +{ + unsigned long idx; + + if(!curr_id || !id ) /* don't do anything if turned off */ + return 0; + + idx = IDX(addr); + + if (idx > ENTRIES) { + PROBLEM(); + return -1; + } + + /* If someone else has already registered ID, take shortcut + * and only make the alloc_map entry. + */ + if(id->id) + goto alloc_ok; +#if 0 + printk("allocating ID.%d for %s:%d.\n",curr_id, id->file, id->line); +#endif + id->id = curr_id; + curr_id++; + if (curr_id == MAX_ID) { + printk("ID wrapped around, stopping ID allocation.\n"); + printk("Increase ID_BITS in memleak.c.\n"); + curr_id = 0; + } else + id_map[curr_id-1] = id; +alloc_ok: + alloc_map[idx].id = id->id; + alloc_map[idx].timestamp = jiffies>>ID_BITS; + return 0; +} + +int free_addr(unsigned long addr) +{ + unsigned idx; + + if(!curr_id) + return 0; + + idx = IDX(addr); + + if (idx > ENTRIES) { + PROBLEM(); + return -1; + } + + alloc_map[idx].id = 0; + return 0; +} + +/* + * We put the alloc table at the end of physical memory + */ +unsigned long memleak_init (unsigned long start_mem, unsigned long end_mem) +{ + unsigned long MEMSIZE, size; + + id_map[0] = &NULL_id; + + end_mem = PAGE_ALIGN(end_mem); + + MEMSIZE = end_mem-PAGE_OFFSET; + ENTRIES = MEMSIZE/(MIN_MEMBLOCK_SIZE+sizeof(struct alloc_entry))+1; + + size = ENTRIES * sizeof(struct alloc_entry); + + end_mem = PAGE_ALIGN(end_mem-size)-PAGE_SIZE; + + alloc_map = (struct alloc_entry *) end_mem; + + printk("MEMLEAK, allocating %ld KB allocation map.\n", size/1024); + + if (!alloc_map) { + PROBLEM(); + for(;;); + } + + memset(alloc_map,0,size); + curr_id = 1; + + return end_mem; +} + + +#define LINE_SIZE 128 + +static ssize_t read_allocations (struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + struct alloc_struct * id; + unsigned long p; + unsigned long idx; + unsigned long timestamp; + + char line [LINE_SIZE]; + + if(ppos != &file->f_pos) + return -ESPIPE; + + p = idx = *ppos; + + if (count < 0) + return -EINVAL; +repeat: + if (idx >= ENTRIES) + return 0; + + if (alloc_map[idx].id) { + id = id_map[alloc_map[idx].id]; + timestamp = (alloc_map[idx].timestamp< %s:%d (%ld)\n", + (void *)(idx*MIN_MEMBLOCK_SIZE),id->file,id->line,timestamp); + copy_to_user(buf,line,count); + } else { + if (!idx) { + count = sprintf(line,"<%p> jiffies.c:%d (%ld)\n", NULL, 0, jiffies/HZ); + copy_to_user(buf,line,count); + } else { + idx++; + *ppos = idx; + /* + * there is at least one allocation in the system + */ + goto repeat; + } + } + + idx++; + *ppos = idx; + + return count; +} + +static struct file_operations proc_memleak_operations = { + NULL, /* lseek */ + read_allocations, + NULL, /* write */ +}; + +struct inode_operations proc_memleak_inode_operations = { + &proc_memleak_operations, +}; Index: linux/mm/page_alloc.c diff -u linux/mm/page_alloc.c:1.1.1.3 linux/mm/page_alloc.c:1.1.3.2 --- linux/mm/page_alloc.c:1.1.1.3 Tue Jan 26 19:32:27 1999 +++ linux/mm/page_alloc.c Sun Jan 31 03:55:28 1999 @@ -5,6 +5,7 @@ * Swap reorganised 29.12.95, Stephen Tweedie */ +#define MEMLEAK_PASS_ALLOCATION #include #include #include @@ -101,6 +102,7 @@ #define list(x) (mem_map+(x)) + MEMLEAK_FREE((PAGE_OFFSET + ((map_nr) << PAGE_SHIFT))); /*ADDRESS macro below*/ map_nr &= mask; nr_free_pages -= mask; while (mask + (1 << (NR_MEM_LISTS-1))) { @@ -167,6 +169,7 @@ MARK_USED(map_nr, new_order, area); \ nr_free_pages -= 1 << order; \ EXPAND(ret, map_nr, order, new_order, area); \ + MEMLEAK_ALLOC_NOLOCK(ADDRESS(map_nr)); \ spin_unlock_irqrestore(&page_alloc_lock, flags); \ return ADDRESS(map_nr); \ } \ @@ -191,7 +194,11 @@ int low_on_memory = 0; +#ifndef CONFIG_MEMLEAK unsigned long __get_free_pages(int gfp_mask, unsigned long order) +#else +unsigned long __get_free_pages_wrap(int gfp_mask, unsigned long order, struct alloc_struct *IDPTR) +#endif { unsigned long flags; Index: linux/mm/slab.c diff -u linux/mm/slab.c:1.1.1.1 linux/mm/slab.c:1.1.3.1 --- linux/mm/slab.c:1.1.1.1 Mon Jan 18 02:27:02 1999 +++ linux/mm/slab.c Sun Jan 24 20:13:10 1999 @@ -100,10 +100,19 @@ * is less than 512 (PAGE_SIZE<<3), but greater than 256. */ +#define MEMLEAK_PASS_ALLOCATION +#define MEMLEAK_UNWRAP_SLAB + #include #include #include #include +#include +#include + +#include +#include +#include /* If there is a different PAGE_SIZE around, and it works with this allocator, * then change the following. @@ -676,9 +685,15 @@ * NOTE: The 'name' is assumed to be memory that is _not_ going to disappear. */ kmem_cache_t * +#ifndef CONFIG_MEMLEAK kmem_cache_create(const char *name, size_t size, size_t offset, unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long), void (*dtor)(void*, kmem_cache_t *, unsigned long)) +#else +kmem_cache_create_wrap(const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long), + void (*dtor)(void*, kmem_cache_t *, unsigned long), struct alloc_struct *IDPTR) +#endif { const char *func_nm= KERN_ERR "kmem_create: "; kmem_cache_t *searchp; @@ -1038,13 +1053,18 @@ ret = 1; if (cachep->c_lastp == kmem_slab_end(cachep)) ret--; /* Cache is empty. */ + MEMLEAK_FREE_TRUE((ret == 0),cachep); spin_unlock_irq(&cachep->c_spinlock); return ret; } /* Get the memory for a slab management obj. */ static inline kmem_slab_t * +#ifndef CONFIG_MEMLEAK kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags) +#else +kmem_cache_slabmgmt_wrap(kmem_cache_t *cachep, void *objp, int local_flags, struct alloc_struct *IDPTR) +#endif { kmem_slab_t *slabp; @@ -1132,7 +1152,11 @@ * kmem_cache_alloc() when there are no active objs left in a cache. */ static int +#ifndef CONFIG_MEMLEAK kmem_cache_grow(kmem_cache_t * cachep, int flags) +#else +kmem_cache_grow_wrap(kmem_cache_t * cachep, int flags, struct alloc_struct *IDPTR) +#endif { kmem_slab_t *slabp; struct page *page; @@ -1342,7 +1366,11 @@ /* Returns a ptr to an obj in the given cache. */ static inline void * +#ifndef CONFIG_MEMLEAK __kmem_cache_alloc(kmem_cache_t *cachep, int flags) +#else +__kmem_cache_alloc_wrap(kmem_cache_t *cachep, int flags, struct alloc_struct *IDPTR) +#endif { kmem_slab_t *slabp; kmem_bufctl_t *bufp; @@ -1380,6 +1408,7 @@ * obj has been removed from the slab. Should be safe to drop * the lock here. */ + MEMLEAK_ALLOC_NOLOCK(objp); spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); #if SLAB_DEBUG_SUPPORT if (cachep->c_flags & SLAB_RED_ZONE) @@ -1512,6 +1541,7 @@ kmem_poison_obj(cachep, objp); } #endif /* SLAB_DEBUG_SUPPORT */ + MEMLEAK_FREE(objp); spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); return; } @@ -1587,7 +1617,11 @@ } void * +#ifndef CONFIG_MEMLEAK kmem_cache_alloc(kmem_cache_t *cachep, int flags) +#else +kmem_cache_alloc_wrap(kmem_cache_t *cachep, int flags, struct alloc_struct *IDPTR) +#endif { return __kmem_cache_alloc(cachep, flags); } @@ -1599,7 +1633,11 @@ } void * +#ifndef CONFIG_MEMLEAK kmalloc(size_t size, int flags) +#else +kmalloc_wrap(size_t size, int flags, struct alloc_struct *IDPTR) +#endif { cache_sizes_t *csizep = cache_sizes; Index: linux/mm/vmalloc.c diff -u linux/mm/vmalloc.c:1.1.1.2 linux/mm/vmalloc.c:1.1.3.1 --- linux/mm/vmalloc.c:1.1.1.2 Sat Jan 23 19:52:32 1999 +++ linux/mm/vmalloc.c Sun Jan 24 20:13:10 1999 @@ -4,8 +4,14 @@ * Copyright (C) 1993 Linus Torvalds */ +#define MEMLEAK_PASS_ALLOCATION +#define MEMLEAK_UNWRAP_VMALLOC +#define MEMLEAK_UNWRAP_SLAB + +#include #include #include +#include #include @@ -82,7 +88,12 @@ flush_tlb_all(); } +#ifndef CONFIG_MEMLEAK static inline int alloc_area_pte(pte_t * pte, unsigned long address, unsigned long size) +#else +static inline int alloc_area_pte_wrap(pte_t * pte, unsigned long address, + unsigned long size, struct alloc_struct *IDPTR) +#endif { unsigned long end; @@ -104,7 +115,12 @@ return 0; } +#ifndef CONFIG_MEMLEAK static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size) +#else +static inline int alloc_area_pmd_wrap(pmd_t * pmd, unsigned long address, + unsigned long size, struct alloc_struct *IDPTR) +#endif { unsigned long end; @@ -124,7 +140,12 @@ return 0; } +#ifndef CONFIG_MEMLEAK int vmalloc_area_pages(unsigned long address, unsigned long size) +#else +int vmalloc_area_pages_wrap(unsigned long address, unsigned long size, + struct alloc_struct *IDPTR ) +#endif { pgd_t * dir; unsigned long end = address + size; @@ -149,7 +170,11 @@ return 0; } +#ifndef CONFIG_MEMLEAK struct vm_struct * get_vm_area(unsigned long size) +#else +struct vm_struct * get_vm_area_wrap(unsigned long size, struct alloc_struct *IDPTR) +#endif { unsigned long addr; struct vm_struct **p, *tmp, *area; @@ -195,7 +220,11 @@ printk("Trying to vfree() nonexistent vm area (%p)\n", addr); } +#ifndef CONFIG_MEMLEAK void * vmalloc(unsigned long size) +#else +void * vmalloc_wrap(unsigned long size, struct alloc_struct *IDPTR) +#endif { void * addr; struct vm_struct *area; Index: linux/net/netsyms.c diff -u linux/net/netsyms.c:1.1.1.3 linux/net/netsyms.c:1.1.3.3 --- linux/net/netsyms.c:1.1.1.3 Wed Mar 24 01:53:27 1999 +++ linux/net/netsyms.c Thu Mar 25 02:27:26 1999 @@ -115,10 +115,15 @@ EXPORT_SYMBOL(sock_getsockopt); EXPORT_SYMBOL(sock_sendmsg); EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sk_alloc); EXPORT_SYMBOL(sk_free); EXPORT_SYMBOL(sock_wake_async); +#ifndef CONFIG_MEMLEAK +EXPORT_SYMBOL(sk_alloc); EXPORT_SYMBOL(sock_alloc_send_skb); +#else +EXPORT_SYMBOL(sk_alloc_wrap); +EXPORT_SYMBOL(sock_alloc_send_skb_wrap); +#endif EXPORT_SYMBOL(sock_init_data); EXPORT_SYMBOL(sock_no_dup); EXPORT_SYMBOL(sock_no_release); @@ -138,18 +143,26 @@ EXPORT_SYMBOL(sock_no_recvmsg); EXPORT_SYMBOL(sock_rfree); EXPORT_SYMBOL(sock_wfree); +#ifndef CONFIG_MEMLEAK EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(sock_rmalloc); +#else +EXPORT_SYMBOL(sock_wmalloc_wrap); +EXPORT_SYMBOL(sock_rmalloc_wrap); +#endif EXPORT_SYMBOL(sock_rspace); EXPORT_SYMBOL(skb_recv_datagram); EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_copy_datagram); EXPORT_SYMBOL(skb_copy_datagram_iovec); -EXPORT_SYMBOL(skb_realloc_headroom); EXPORT_SYMBOL(datagram_poll); EXPORT_SYMBOL(put_cmsg); EXPORT_SYMBOL(net_families); +#ifndef CONFIG_MEMLEAK EXPORT_SYMBOL(sock_kmalloc); +#else +EXPORT_SYMBOL(sock_kmalloc_wrap); +#endif EXPORT_SYMBOL(sock_kfree_s); EXPORT_SYMBOL(skb_queue_lock); @@ -443,10 +456,18 @@ EXPORT_SYMBOL(fddi_setup); #endif /* CONFIG_FDDI */ EXPORT_SYMBOL(eth_copy_and_sum); -EXPORT_SYMBOL(alloc_skb); EXPORT_SYMBOL(__kfree_skb); +#ifndef CONFIG_MEMLEAK +EXPORT_SYMBOL(alloc_skb); EXPORT_SYMBOL(skb_clone); EXPORT_SYMBOL(skb_copy); +EXPORT_SYMBOL(skb_realloc_headroom); +#else +EXPORT_SYMBOL(alloc_skb_wrap); +EXPORT_SYMBOL(skb_clone_wrap); +EXPORT_SYMBOL(skb_copy_wrap); +EXPORT_SYMBOL(skb_realloc_headroom_wrap); +#endif EXPORT_SYMBOL(netif_rx); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_remove_pack); Index: linux/net/core/skbuff.c diff -u linux/net/core/skbuff.c:1.1.1.3 linux/net/core/skbuff.c:1.1.3.3 --- linux/net/core/skbuff.c:1.1.1.3 Tue Mar 9 01:56:01 1999 +++ linux/net/core/skbuff.c Fri Mar 19 01:23:28 1999 @@ -36,6 +36,10 @@ * The functions in this file will not compile correctly with gcc 2.4.x */ +#define MEMLEAK_PASS_ALLOCATION +#define MEMLEAK_UNWRAP_SKBUFF +#define MEMLEAK_UNWRAP_SLAB + #include #include #include @@ -61,6 +65,8 @@ #include #include +#include + /* * Skb list spinlock */ @@ -116,7 +122,12 @@ * */ +#ifndef CONFIG_MEMLEAK struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) +#else +struct sk_buff *alloc_skb_wrap(unsigned int size,int gfp_mask, + struct alloc_struct * IDPTR) +#endif { struct sk_buff *skb; u8 *data; @@ -232,7 +243,12 @@ * Duplicate an sk_buff. The new one is not owned by a socket. */ +#ifndef CONFIG_MEMLEAK struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +#else +struct sk_buff *skb_clone_wrap(struct sk_buff *skb, int gfp_mask, + struct alloc_struct * IDPTR) +#endif { struct sk_buff *n; @@ -261,7 +277,12 @@ * This is slower, and copies the whole data area */ +#ifndef CONFIG_MEMLEAK struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask) +#else +struct sk_buff *skb_copy_wrap(struct sk_buff *skb, int gfp_mask, + struct alloc_struct * IDPTR) +#endif { struct sk_buff *n; unsigned long offset; @@ -310,7 +331,12 @@ return n; } +#ifndef CONFIG_MEMLEAK struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom) +#else +struct sk_buff *skb_realloc_headroom_wrap(struct sk_buff *skb, + int newheadroom, struct alloc_struct * IDPTR) +#endif { struct sk_buff *n; unsigned long offset; Index: linux/net/core/sock.c diff -u linux/net/core/sock.c:1.1.1.5 linux/net/core/sock.c:1.1.3.5 --- linux/net/core/sock.c:1.1.1.5 Mon Mar 29 23:41:36 1999 +++ linux/net/core/sock.c Tue Mar 30 20:11:16 1999 @@ -89,6 +89,11 @@ * 2 of the License, or (at your option) any later version. */ +#define MEMLEAK_PASS_ALLOCATION +#define MEMLEAK_UNWRAP_SOCK +#define MEMLEAK_UNWRAP_SKBUFF +#define MEMLEAK_UNWRAP_SLAB + #include #include #include @@ -130,6 +135,8 @@ #include #endif +#include + #define min(a,b) ((a)<(b)?(a):(b)) /* Run time adjustable parameters. */ @@ -483,7 +490,11 @@ * usage. */ +#ifndef CONFIG_MEMLEAK struct sock *sk_alloc(int family, int priority, int zero_it) +#else +struct sock *sk_alloc_wrap(int family, int priority, int zero_it, struct alloc_struct *IDPTR) +#endif { struct sock *sk = kmem_cache_alloc(sk_cachep, priority); @@ -553,10 +564,14 @@ } +#ifndef CONFIG_MEMLEAK /* * Allocate a skb from the socket's send buffer. */ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) +#else +struct sk_buff *sock_wmalloc_wrap(struct sock *sk, unsigned long size, int force, int priority, struct alloc_struct *IDPTR) +#endif { if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -570,10 +585,14 @@ return NULL; } +#ifndef CONFIG_MEMLEAK /* * Allocate a skb from the socket's receive buffer. */ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) +#else +struct sk_buff *sock_rmalloc_wrap(struct sock *sk, unsigned long size, int force, int priority, struct alloc_struct *IDPTR) +#endif { if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -587,10 +606,14 @@ return NULL; } +#ifndef CONFIG_MEMLEAK /* * Allocate a memory block from the socket's option memory buffer. */ void *sock_kmalloc(struct sock *sk, int size, int priority) +#else +void *sock_kmalloc_wrap(struct sock *sk, int size, int priority, struct alloc_struct *IDPTR) +#endif { if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) { void *mem; @@ -675,8 +698,13 @@ * Generic send/receive buffer handlers */ +#ifndef CONFIG_MEMLEAK struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode) +#else +struct sk_buff *sock_alloc_send_skb_wrap(struct sock *sk, unsigned long size, + unsigned long fallback, int noblock, int *errcode, struct alloc_struct *IDPTR) +#endif { int err; struct sk_buff *skb; Index: linux/scripts/Makefile diff -u linux/scripts/Makefile:1.1.1.1 linux/scripts/Makefile:1.1.3.1 --- linux/scripts/Makefile:1.1.1.1 Mon Jan 18 02:29:25 1999 +++ linux/scripts/Makefile Sun Jan 24 20:13:54 1999 @@ -1,6 +1,20 @@ HEADER=header.tk TAIL=tail.tk +# +# include dependency files they exist +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + +# +# Routines in this directory are external to the kernel but partake of the +# kernel namespace. Since they are external, they are not candidates for +# profiling. +# +override CFLAGS := $(CFLAGS:%-pg=%-g -c) + # Previous versions always remade kconfig.tk because they always depended # on soundscript. This runs fairly fast, and I can't find all the # Config.in files to depend on anyways. So I'll force it to remake. @@ -33,7 +47,11 @@ tkparse.o tkcond.o tkgen.o: $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $(@:.o=.c) +ktrace: ktrace.o + $(CC) -o ktrace ktrace.o + clean: rm -f *~ kconfig.tk *.o tkparse mkdep split-include + rm -f ktrace include $(TOPDIR)/Rules.make Index: linux/scripts/ktrace.c diff -u /dev/null linux/scripts/ktrace.c:1.1.3.1 --- /dev/null Sat Apr 24 02:50:45 1999 +++ linux/scripts/ktrace.c Sun Jan 24 20:13:56 1999 @@ -0,0 +1,481 @@ +/* ktrace.c + * + * Read /proc/trace and System.map (or equivalent) and print the trace entries. + * Prints the time taken between trace calls, "(????)" if the next entry for the + * current processor cannot be found. Prints the current pid, if the next entry + * for the current processor is for a different pid, prints "pid(old->new)". + * If compiled for SMP, the trace table contains the logical processor number, + * this is printed as "cpu(n)". + * + * The System.map can be the standard System.map for the kernel, in which case + * module traces will not resolve very well. It can be a merged System.map + * containing module entries as well, see make_System_map.pl for an example, + * ftp://ftp.ocs.com.au/pub/. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_TRACE +#include + +/* + * Dumbomarbitrary limits + */ + +#define LINE_LIMIT 100 +#define SYSMAP_LIMIT 10000 + +static struct trace_table * tt; + +struct sysmap_entry { + profiler_pc_t pc; + char * name; +}; + +static struct sysmap_entry sysmap_table [SYSMAP_LIMIT]; + +static int sysmap_entries = 0; +static int default_speed = 150, speed, lock = 0; +static char *default_map = "/usr/src/linux/System.map", *map, *dump; +static char *prog_name; + +static void usage(void) +{ + fprintf(stderr, "usage: %s\n", prog_name); + fprintf(stderr, "\t[--speed MHz] [-s MHz]\t\t\thow fast is your processor?\n"); + fprintf(stderr, "\t[--map System.map] [-m System.map]\twhere is your system map?\n"); + fprintf(stderr, "\t[--lock] [-l]\t\t\t\twait for the lock on /proc/trace\n"); + fprintf(stderr, "\t[--dump filename] [-d filename]\t\tread trace dump from filename\n"); + fprintf(stderr, "Default --speed is %d\n", default_speed); + fprintf(stderr, "Default --map is %s\n", default_map); + exit(1); +} + +static void read_sysmap (void) +{ + profiler_pc_t pc; + char type; + int i, len; + + FILE * file; + char str [LINE_LIMIT+1]; + + file=fopen(map, "r"); + + if (!file) { + fprintf(stderr,"System.map '%s' missing.\n", map); + usage(); + } + + sysmap_table[0].pc = 0; + sysmap_table[0].name = "
\n"; + + sysmap_entries=1; + + while (fscanf(file, "%lx %1c", &pc, &type) == 2) { + i=sysmap_entries++; + if (!fgets(str, LINE_LIMIT, file)) { + perror("ouch, System.map format error.\n"); + exit(-1); + } + sysmap_table[i].pc = pc; + sysmap_table[i].name = malloc(LINE_LIMIT); + if (!sysmap_table[i].name) { + perror("ouch, outta mem.\n"); + exit(-1); + } + /* + * Dirty trick to strip off end of line: + */ + len = strlen(str); + str[len-1]=0; + strcpy (sysmap_table[i].name, str); + } + + printf("read %d lines from System.map.\n", sysmap_entries-1); + + sysmap_table[sysmap_entries].pc = ~1; + sysmap_table[sysmap_entries].name = "
\n"; + sysmap_entries++; + + /* To be sure, to be sure :). */ + sysmap_table[sysmap_entries].pc = ~0; + sysmap_table[sysmap_entries++].name = ""; + sysmap_table[sysmap_entries].pc = ~0; + sysmap_table[sysmap_entries++].name = ""; + +/* + * for (i=0; i1) { + middle = first+(last-first)/2; + if (sysmap_table[middle].pc <= pc) + first = middle; + else + last = middle; + } + + return first; +} + +/* The trace table is a ring buffer. Convert 0 <= index < size to the + * corresponding entry, with wraparound as necessary. + */ +static inline int ring(int x) +{ + return ((x) % CONFIG_TRACE_SIZE); +} + +#if defined(CONFIG_TRACE_CPU) && (defined(__SMP__) || defined(CONFIG_SMP)) +#define CPU_PRESENT 1 +#else +#define CPU_PRESENT 0 +#endif + +static ssize_t read_dump(int fd, void *buf, size_t count) +{ + /* Find the start of the hex dump of /proc/trace, read + * and convert hex digits, storing in buf. Any garbage + * nibbles are silently ignored and treated as '0'. + */ + char line[BUFSIZ]; + int start = 0, value; + char *pline, c; + unsigned char *pbuf; + FILE *f = fdopen(fd, "r"); + if (!f) { + perror("read_dump fdopen failed"); + exit(-1); + } + pbuf = (unsigned char *) buf; + while (fgets(line, sizeof(line), f)) { + if (ferror(f)) { + perror("read_dump ferror detected"); + exit(-1); + } + if (strstr(line, "DAL: ktrace start")) { + start = 1; + continue; + } + if (start) { + if (strstr(line, "DAL: ktrace end")) + break; + pline = line; + while (*pline) { + while (*pline == '\r' || *pline == '\n') + ++pline; + if (!(c = *pline++)) + break; + value = 0; + if (c >= '0' && c <= '9') + value = c - '0'; + else if (c >= 'a' && c <= 'f') + value = c - 'a' + 10; + value <<= 4; + if (!(c = *pline++)) + break; + if (c >= '0' && c <= '9') + value += c - '0'; + else if (c >= 'a' && c <= 'f') + value += c - 'a' + 10; + if (count > 0) { + --count; + *(pbuf++) = (unsigned char) value; + } + if (count == 0) + break; + } + } + } + return(pbuf - (unsigned char *)buf); +} + +static void read_proc_info (void) +{ + int bytes, calibrate; + int i, j; +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t min_latency; +#endif + struct trace_entry *tep1 = NULL, *tep2 = NULL; + + char *filename = "/proc/trace"; + int file; + + if (dump) + filename = dump; + + file=open(filename, O_RDONLY); + + if (!file) { + char message[BUFSIZ]; + sprintf(message, "%s missing\n", filename); + perror(message); + exit(-1); + } + if (lock && !dump && flock(file, LOCK_EX)) { + char message[BUFSIZ]; + sprintf(message, "Cannot get exclusive lock on %s\n", filename); + perror(message); + exit(-1); + } + + tt=(struct trace_table *)malloc(sizeof(*trace_table)); + + if (dump) { + printf("Reading dumped /proc/trace from %s ...", dump); + fflush(stdout); + bytes = read_dump(file, tt, sizeof(*trace_table)); + printf(" done\n"); + fflush(stdout); + } + else + bytes = read(file, tt, sizeof(*trace_table)); + + if (sizeof(*trace_table) != bytes) { + printf("something went wrong, bytes read: %d, tried: %d.\n", bytes, sizeof(*trace_table)); + exit(-1); + } + + if (lock && !dump && flock(file, LOCK_UN)) { + char message[BUFSIZ]; + sprintf(message, "Release lock on %s failed\n", filename); + perror(message); + } + + /* + * Pass 1: look for ~0 which signals calibration latencies. + * Since read_trace (fs/proc/array.c) locks the table and turns + * off mcount processing, the calibration entries should be the + * current entry and the previous TRACE_CALIBRATION_CALLS-1. + */ +#define FIRST_CALIBRATE (tt->curr_call-(TRACE_CALIBRATION_CALLS-1)) + +#ifdef CONFIG_TRACE_TIMESTAMP + min_latency = ~0; +#endif + calibrate = 0; + + if (!dump) { + /* look for read_trace in 200 entries before FIRST_CALIBRATE. + * 200 is arbitrary, normally read_trace is immediately before + * the first calibration but there is a small window between + * read_trace starting and tracing being suspended, other cpu's + * and/or interrupts can appear in that window. KAO + */ + for (j = 1; j <= 200; ++j) { + tep1 = &(tt->entries[ring(FIRST_CALIBRATE-j)]); + i = match_pc(tep1->pc); + if (!strcmp(sysmap_table[i].name," read_trace")) + break; + } + if (strcmp(sysmap_table[i].name," read_trace")) { + tep1 = &(tt->entries[ring(FIRST_CALIBRATE-1)]); + i = match_pc(tep1->pc); + fprintf(stderr, + "hmm, no 'read_trace', possibly wrong System.map?.\npc %lx proc %s\n", + tep1->pc, sysmap_table[i].name); + } + } + + for (i = FIRST_CALIBRATE; i < tt->curr_call; i++) { + tep1 = &(tt->entries[ring(i)]); + tep2 = &(tt->entries[ring(i+1)]); + if (tep1->pc == ~0 && tep2->pc == ~0) { +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t delta; + delta = tep2->timestamp - tep1->timestamp; + if (delta < min_latency) + min_latency=delta; +#endif /* CONFIG_TRACE_TIMESTAMP */ + ++calibrate; + } + } + + if (calibrate != TRACE_CALIBRATION_CALLS-1) { + fprintf(stderr,"huh, incorrect number of calibration entries found (%d)?.\n", calibrate); +#ifdef CONFIG_TRACE_TIMESTAMP + fprintf(stderr,"using 0.39 usecs.\n"); + min_latency = 0.39*speed; + } else { + printf("calibration done, estimated measurement latency: %3.2f microseconds.\n", min_latency/(double)speed); + if (min_latency == 0) { + printf("Warning: latency is zero, does your cpu really support timestamps?\n"); + } + else + min_latency -= 10; +#endif /* CONFIG_TRACE_TIMESTAMP */ + } + printf("\n"); + + + /* Pass 2. */ + + for (i = 1; i <= CONFIG_TRACE_SIZE; i++) { + unsigned int idx; +#ifdef CONFIG_TRACE_TIMESTAMP + profiler_timestamp_t delta = -1; +#endif /* CONFIG_TRACE_TIMESTAMP */ + + tep1 = &(tt->entries[ring(tt->curr_call+i)]); + if (tep1->pc == 0) + continue; /* trace table has been cleared */ +#ifdef CONFIG_TRACE_TIMESTAMP +#if CPU_PRESENT + for (j = 1; j <= CONFIG_TRACE_SIZE-i; ++j) { + tep2 = &(tt->entries[ring(tt->curr_call+i+j)]); + if (tep2->pc == 0) + break; + if (tep1->cpu == tep2->cpu) { + delta = tep2->timestamp - tep1->timestamp; + break; + } + } +#else /* CPU_PRESENT */ + tep2 = &(tt->entries[ring(tt->curr_call+i+1)]); + if (tep2->pc != 0 && i < CONFIG_TRACE_SIZE) + delta = tep2->timestamp - tep1->timestamp; +#endif /* CPU_PRESENT */ +#endif /* CONFIG_TRACE_TIMESTAMP */ + + idx = match_pc(tep1->pc); + +#if 0 /* testing only */ +#ifdef CONFIG_TRACE_TIMESTAMP +#ifdef CONFIG_TRACE_TRUNCTIME + printf("%08x ", tep1->timestamp); +#else + printf("%08llx%08llx ", tep1->timestamp >> 32, + tep1->timestamp & 0xffffffff); +#endif +#endif /* CONFIG_TRACE_TIMESTAMP */ +#endif + printf("%08lx %s +<%lx/%lx>", + tep1->pc, + sysmap_table[idx].name, + tep1->pc-sysmap_table[idx].pc, + sysmap_table[idx+1].pc - sysmap_table[idx].pc); +#ifdef CONFIG_TRACE_TIMESTAMP + if (delta == -1) + printf(" (????)"); + else if (tep1->pc == ~0) + printf(" (%3.08f raw)", + (double)delta); + else + printf(" (%3.02f)", + (delta-min_latency)/(double)speed); +#endif /* CONFIG_TRACE_TIMESTAMP */ +#if CPU_PRESENT + printf(" cpu(%d)", tep1->cpu); +#endif +#ifdef CONFIG_TRACE_PID + if (tep1->pid == tep2->pid) + printf(" pid(%d)", tep1->pid); + else + printf(" pid(%d->%d)", tep1->pid, tep2->pid); +#endif /* CONFIG_TRACE_PID */ + printf("\n"); + } + + free(tt); + close(file); + + printf("\n"); +} + +int main(int argc, char * * argv) +{ + int c, option_index = 0; + char *endptr; + struct option long_options[] = { + {"speed", 1, 0, 's'}, + {"map", 1, 0, 'm'}, + {"lock", 0, 0, 'l'}, + {"dump", 1, 0, 'd'}, + {0, 0, 0, 0} + }; + + prog_name = argv[0]; + speed = default_speed; + map = default_map; + + while (1) { + c = getopt_long_only (argc, argv, "s:m:ld:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 's': + speed = strtol(optarg, &endptr, 0); + if (*endptr) { + fprintf(stderr, "speed is not numeric '%s'\n", + optarg); + usage(); + } + if (speed < 0 || speed > 1000) { + fprintf(stderr, "speed must be 1-1000\n"); + usage(); + } + break; + + case 'm': + map = optarg; + break; + + case 'l': + lock = !lock; + break; + + case 'd': + dump = optarg; + break; + + case '?': + usage(); + exit(-1); + + default: + printf ("?? getopt returned character code 0%o '%c' ??\n", c, c); + } + } + + if (optind < argc) { + fprintf (stderr, "Unknown parameter '%s'\n", argv[optind]); + usage(); + exit(-1); + } + + printf("Speed: %d. Map: %s\n", speed, map); + + read_sysmap(); + read_proc_info(); + return 0; +} + +#else +#warning ktrace does nothing unless CONFIG_TRACE is set +int main(void) { return 0; } +#endif /* CONFIG_TRACE */ Index: linux/scripts/make_System_map.pl diff -u /dev/null linux/scripts/make_System_map.pl:1.1.3.1 --- /dev/null Sat Apr 24 02:50:45 1999 +++ linux/scripts/make_System_map.pl Sun Jan 24 20:13:56 1999 @@ -0,0 +1,324 @@ +#!/usr/bin/perl -w + +# A quick and dirty prototype which will be recoded in C later. I'd +# like people to hit on this and mail kaos@ocs.com.au with any bugs, +# not the kernel list please. Mon Sep 22 03:18:11 EST 1997 + +# Current oops processing suffers from some problems. +# * ksymoops is written in C++ (the only program in the kernel that +# is). +# * ksymoops only reads System.map so it has no idea where modules are. +# * klogd has not handled loaded modules on 2.1.x for some time (cannot +# seek /dev/kmem). +# * klogd cannot always decode the oops, some errors are so bad that +# klogd is effectively dead. +# * Even when klogd can run, /proc/ksyms only shows exported symbols +# for modules so klogd cannot resolve addresses for static procs in +# modules. In contrast, System.map shows all procs, both static and +# extern. +# * Decoding an oops on a machine other than the one it occurred on is +# awkward, due to assumptions about where files are located. + +# To overcome these problems, this program reads System.map or +# equivalent, /proc/ksyms or equivalent and the raw modules. It then +# outputs a new, dynamic System.map containing the symbols from the +# input map plus all the symbols from the currently loaded modules, +# adjusted to where the modules were loaded. This map can then be fed +# into your favourite oops decoder. + +# Typical invocation: +# +# make_System_map.pl /proc/ksyms /System.map \ +# /lib/modules/`uname -r`/*/*.o > /tmp/map +# /usr/src/linux/scripts/ksymoops /tmp/map < oops_log + +# Any filename ending in '.o' is assumed to be a module and is run +# through the nm command to get its symbols. Any file whose first line +# is 8 hex digits, space, any character, space is a system map, no +# matter what its file name. Any file whose first line is 8 hex +# digits, space, any character, non-space is ksyms output, no matter +# what its file name. This means that you can feed any System.map and +# /proc/ksyms or a copy of /proc/ksyms without worrying about the file +# names. You are in control, this program makes no guesses about where +# the various files are. + +# Known bugs (and it's only release 0.1 :). +# * If the same name appears more than once with different types, all +# but one is lost. + +require 5; +use strict; +use File::Basename; + +my $nm = "/usr/bin/nm -p"; # In case somebody moves nm +my $filename; +my $errors = 0; + +# Keyed on the name the module was loaded as (blank for kernel proper). +# Contains symbol (keyed) which contains offset. +my %ksym = (); + +# Keyed on symbol. Contains offset, type. +my %map = (); + +# Keyed on basename. Contains symbol (keyed) which contains offset, type. +my %module = (); + +# Keyed on the name the module was loaded as. Contains basename of the +# real module, adjustments for .text, .data, .rodata. +my %loaded_as = (); + +# Because you can "insmod -o xxx module.o", the module name in ksyms is +# not the real key. The only way (?) is to run all the modules and +# extract externally visible symbols then pick unique symbols for each +# module. Keyed on symbol, contains object basename. +my %unique = (); + +# Convert a data type from nm to the index of the section adjustment in +# loaded_as and the section name. +my %convert_type_to_section = ( + 'T' => [1, ".text"], + 't' => [1, ".text"], + 'D' => [2, ".data"], + 'd' => [2, ".data"], + 'R' => [3, ".rodata"], + 'r' => [3, ".rodata"], + ); + +foreach $filename (@ARGV) { + if ($filename =~ /\.o$/) { + # Skip for now + } + else { + open(INPUT, $filename) || die("Cannot open $filename for input - $!\n"); + my $line = ; + $line = "****** EMPTY FILE ******" if (!defined($line)); + if ($line =~ /^[0-9a-fA-F]{8} . /) { + read_system_map($filename, $line); + } + elsif ($line =~ /^[0-9a-fA-F]{8} .[^ ]/) { + read_ksyms($filename, $line); + } + else { + print STDERR <[0]; + $ksym_offset = $ksym_kernel{$_}; + if ($map_offset != $ksym_offset) { + if ($errors++ == 0) { + print STDERR "Mismatch detected between map and ksym\n"; + } + printf STDERR "Symbol %s, map %x, ksym %x\n", $_, $map_offset, $ksym_offset; + } + } + } + die("Please feed me a matching set of System.map and /proc/ksyms\n") if ($errors); +} + +if (keys(%ksym) == 1) { + print STDERR "No modules loaded, skipping module scan\n"; +} +else { + # Extract symbols from all modules + foreach $filename (@ARGV) { + if ($filename =~ /\.o$/) { + read_object($filename); + } + } + + # Pick the unique external symbols in all modules + { + my %really_unique = (); + foreach (keys(%unique)) { + if ($unique{$_} !~ / /) { + $really_unique{$_} = $unique{$_}; + } + } + %unique = %really_unique; + } + + # Resolve the name under which each module was loaded to the module + # basename and calculate adjustments for each loaded occurrence. + resolve_loaded_as_to_basename(); + + # For every module that was loaded, add all the symbols to the map + # with suitable adjustments. We have a complete map at last! + add_loaded_modules_to_map(); +} + +# And print. +{ + my $symbol; + my @address_order = (); + foreach $symbol (keys(%map)) { + push(@address_order, [ $map{$symbol}->[0], $map{$symbol}->[1], $symbol ]); + } + sub by_address { $a->[0] <=> $b->[0] }; + my @final = sort by_address (@address_order); + foreach (@final) { + printf "%08x %s %s\n", $_->[0], $_->[1], $_->[2]; + } +} + +exit 0; # That was easy :) + +sub read_object +{ + my ($filename) = @_; + my ($offset, $type, $symbol, $external); + my $basename = basename($filename, ".o"); + if (defined($module{$basename})) { + die("Module $basename appears more than once\n"); + } + my %symbol; + $module{$basename} = \%symbol; + if (! open(NMDATA, "$nm $filename|")) { + die("$nm $filename failed $!\n"); + } + while () { + # Ignore entries with no offset (unresolved external references) + next if (substr($_, 0, 1) eq " "); + chop; + ($offset, $type, $symbol) = split(' ', $_, 3); + $offset = hex($offset); + # Module symbols are externally visible if they are type 'D' + # (global variable, initialised), 'R' (global data, read only), + # 'T' (global label/procedure), or there is a '? __ksymtab_' + # entry for the symbol. + $external = ""; + if ($type eq 'D' || $type eq 'R' || $type eq 'T') { + $external = $symbol; + } + elsif ($type eq '?' && substr($symbol, 0, 10) eq "__ksymtab_") { + $external = substr($symbol, 10); + } + if ($external ne "") { + if (!defined($unique{$external})) { + $unique{$external} = $basename; + } + elsif ($unique{$external} ne $basename) { + # Not unique after all + $unique{$external} .= " $basename"; + } + } + $symbol{$symbol} = [ $offset, $type ]; + } + close(NMDATA); +} + +sub resolve_loaded_as_to_basename +{ + my ($loaded_as, $symbol, $basename, $adjustment, $type, + $section, $section_name); + foreach $loaded_as (keys(%loaded_as)) { + foreach $symbol (keys(%{$ksym{$loaded_as}})) { + if (defined($unique{$symbol})) { + # Symbol from ksyms uniquely identifies a module. + # Calculate the adjustment and log the real module + # name. + $basename = $unique{$symbol}; + $adjustment = $ksym{$loaded_as}->{$symbol} - + $module{$basename}->{$symbol}->[0]; + $type = $module{$basename}->{$symbol}->[1]; + # Silently ignore type 'C'. It is .bss data and the first + # field is a size, not an offset. + next if ($type eq 'C'); + if (defined($convert_type_to_section{$type})) { + ($section, $section_name) = @{$convert_type_to_section{$type}}; + } + else { + die("Unexpected type $type, symbol $symbol, module $basename\n"); + } + if (!defined($loaded_as{$loaded_as})) { + $loaded_as{$loaded_as} = [ $basename, undef, undef, undef ]; + } + my $ref_resolved = $loaded_as{$loaded_as}; + if (!defined($ref_resolved->[$section])) { + $ref_resolved->[$section] = $adjustment; + printf STDERR "%s %s resolved to %s, adjustment %x\n", + $loaded_as, $section_name, $basename, $adjustment; + printf STDERR " Unique symbol %s was used, type %s\n", $symbol, $type; + } + elsif ($adjustment != $ref_resolved->[$section]) { + if ($errors++ == 0) { + printf STDERR "Mismatch detected between ksyms and %s (%s)\n", + $loaded_as, $basename; + } + printf STDERR "Symbol %s, type %s, section %s, adjustment 1 %x, adjustment 2 %x\n", + $symbol, $type, $section_name, $ref_resolved->[$section], $adjustment; + } + } + } + die("Unable to resolve $loaded_as back to the real module\n") if (!defined($adjustment)); + die("Please feed me a matching set of modules and ksyms\n") if ($errors); + } +} + +sub add_loaded_modules_to_map +{ + my ($loaded_as, $basename, $symbol, $offset, $type, $adjustment, $section); + foreach $loaded_as (keys(%loaded_as)) { + $basename = $loaded_as{$loaded_as}->[0]; + foreach $symbol (keys(%{$module{$basename}})) { + ($offset, $type) = @{$module{$basename}->{$symbol}}; + if (defined($convert_type_to_section{$type})) { + $section = $convert_type_to_section{$type}->[0]; + $adjustment = $loaded_as{$loaded_as}->[$section]; + if (defined($adjustment)) { + $map{$symbol} = [ $offset+$adjustment, $type ]; + } + } + } + } +} + +sub read_ksyms +{ + my ($filename, $line) = @_; + my ($offset, $symbol, $loaded_as); + do { + chop($line); + ($offset, $symbol, $loaded_as) = split(' ', $line, 3); + $offset = hex($offset); + if (defined($loaded_as)) { + $loaded_as =~ s:\[::; + $loaded_as =~ s:\]::; + $loaded_as{$loaded_as} = undef; # fill it in later + } + else { + $loaded_as = ""; + } + if (!defined($ksym{$loaded_as})) { + $ksym{$loaded_as} = {}; + } + $ksym{$loaded_as}->{$symbol} = $offset; + } while ($line = ); +} + +sub read_system_map +{ + my ($filename, $line) = @_; + my ($offset, $type, $symbol); + do { + chop($line); + ($offset, $type, $symbol) = split(' ', $line, 3); + $offset = hex($offset); + $map{$symbol} = [ $offset, $type ]; + } while ($line = ); +} Index: linux/scripts/sort-profiling-call.pl diff -u /dev/null linux/scripts/sort-profiling-call.pl:1.1.3.1 --- /dev/null Sat Apr 24 02:50:45 1999 +++ linux/scripts/sort-profiling-call.pl Sun Jan 24 20:13:56 1999 @@ -0,0 +1,6 @@ +#!/usr/bin/perl + +@lines = ; +@lines = sort { ($a =~ /(\d+)/)[0] <=> + ($b =~ /(\d+)/)[0] } @lines; +print @lines; Index: linux/scripts/sort-profiling-cpu.pl diff -u /dev/null linux/scripts/sort-profiling-cpu.pl:1.1.3.1 --- /dev/null Sat Apr 24 02:50:45 1999 +++ linux/scripts/sort-profiling-cpu.pl Sun Jan 24 20:13:56 1999 @@ -0,0 +1,6 @@ +#!/usr/bin/perl + +@lines = ; +@lines = sort { ($a =~ /\d+\s+[^\s]+\s+(\d+\.?\d*)/)[0] <=> + ($b =~ /\d+\s+[^\s]+\s+(\d+\.?\d*)/)[0] } @lines; +print @lines; Index: linux/scripts/xkgdb diff -u /dev/null linux/scripts/xkgdb:1.1.3.1 --- /dev/null Sat Apr 24 02:50:46 1999 +++ linux/scripts/xkgdb Sun Jan 24 20:13:56 1999 @@ -0,0 +1,161 @@ +#!/bin/sh +# +# kgdb 1.6 1995/06/07 06:05:46 (David Hinds) +# +# A script for starting up gdb for kernel debugging +# +# This will load the kdebug.o module if necessary, create a device +# file for the kdebug pseudo-device, and fire up gdb, loading the +# kernel symbol table and module symbols. +# + +mpath=/lib/modules/`uname -r` +vmlinux=/usr/src/linux/vmlinux + +while getopts "m:v:t:p:" ch ; do + case $ch in + m) mpath=$OPTARG ;; + v) vmlinux=$OPTARG ;; + t) trapfile=$OPTARG ;; + p) pid=$OPTARG ;; + \?) + echo "usage: $0 [-m modpath] [-v vmlinux] [-t trapfile] [-p pid]" >&2 + exit 1 + ;; + esac +done + +o_path=$mpath/misc/xkdebug.o +if [ ! -f $o_path ] ; then + echo "$o_path does not exist!" + exit 1 +fi + +if [ ! -f $vmlinux ] ; then + echo "kernel image \"$vmlinux\" does not exist!" + exit 1 +fi + +strings $vmlinux | grep -q "`uname -v`" +if [ $? -ne 0 ] ; then + echo "Warning: $vmlinux version does not match current kernel!" +fi + +insmod $o_path + +rm -f /dev/xkdebug +major=`grep ' xkdebug$' /proc/devices | cut -d' ' -f1` +mknod /dev/xkdebug c $major 0 + +cat << EOF > /tmp/kgdb.$$ +echo Reading symbols from $vmlinux... +file $vmlinux +echo done\n +echo Connecting to running kernel...\n +target remote /dev/xkdebug +set output-radix 16 +define set-task + set \$task=*(struct task_struct **)get_task(\$pid) + set \$tss=\$task->tss + set \$eax=\$tss.eax, \$ebx=\$tss.ebx, \$ecx=\$tss.ecx + set \$edx=\$tss.edx, \$esi=\$tss.esi, \$edi=\$tss.edi + set \$eip=\$tss.eip, \$ebp=\$tss.ebp, \$esp=\$tss.esp + set \$ps=\$tss.eflags +end +EOF + +search=`/bin/ls -d $mpath/*` +search=". $search" + +expand() { + while read a b c ; do + for d in $search ; do + if [ -r $d/$b ] ; then + echo "echo loading $d/$b...\n" + echo $a $d/$b $c + break + fi + done + done +} + +# The problem described below shows itself when you gdb filename, the +# file was compiled with -g and is an object file (*.o) rather than a +# linked module. -g with linked modules such as vmlinux is fine, *.o +# files without -g are fine, just the combination of the two causes +# problems. The symptoms are that all functions map to the same place +# in the file. A typical session looks like this :- +# +# gdb /lib/modules/2.1.76/misc/xkdebug.o +# GDB is free software and you are welcome to distribute copies of it +# under certain conditions; type "show copying" to see the conditions. +# There is absolutely no warranty for GDB; type "show warranty" for details. +# GDB 4.16 (i586-unknown-linux), Copyright 1996 Free Software Foundation, Inc... +# (gdb) info line write_registers +# Line 170 of "xkdebug.c" starts at address 0x28 +# and ends at 0x2a . +# (gdb) info line read_registers +# Line 170 of "xkdebug.c" starts at address 0x28 +# and ends at 0x2a . +# +# gdb thinks that both functions are at the same place, not good if you +# want to set breakpoints. If there are any gdb experts out there who +# can fix this, Keith Owens would like to know how. + +cat <<\EOF + +gdb 4.16 gets confused by *.o files that were compiled with debugging +information (-g). This makes it too risky to run xkdebug on loaded +modules so, until the gdb problem is fixed, xkgdb does not load symbols +for modules. If any debugging expert can solve this problem, read the +xkgdb script for more details. + +EOF + +# The original code that loaded symbol tables for modules. +#ksyms -m | sed -n -e 's/\(.*\)000 .*\[\(.*\)\]/\1048 \2/p' | \ +# awk '{ printf("add-symbol-file %s.o\t0x%s\n", $2, $1) }' \ +# | expand >> /tmp/kgdb.$$ + +if [ -n "$trapfile" ] ; then + if [ -r $trapfile ] ; then + echo "echo loading trap info...\n" >> /tmp/kgdb.$$ + else + echo "trap file \"$trapfile\" does not exist!" + exit 1 + fi + # Magic number to signal we want to set up a separate stack + echo "set $eip=0xe5e5e5e5" >> /tmp/kgdb.$$ + cat $trapfile | awk ' + /EIP:/ { match($0, "EIP:"); print substr($0, RSTART) } + /EFLAGS:/,/Code:/ { print substr($0, RSTART) } ' \ + | awk ' + /EIP: / \ + { print "set $eip=0x" substr($2,6) } + /EFLAGS: / \ + { print "set $ps=0x" $2 } + /eax: / \ + { print "set $eax=0x" $2 ", $ebx=0x" $4 ", $ecx=0x" $6 ", $edx=0x" $8 } + /esi: / \ + { print "set $esi=0x" $2 ", $edi=0x" $4 ", $ebp=0x" $6 ", $esp=0x" $8 } + /Stack: / \ + { for (i=2;i<=NF;i++) print "set ((long *)$esp)[" sp++ "]=0x" $i } + /^ / \ + { for (i=1;i<=NF;i++) print "set ((long *)$esp)[" sp++ "]=0x" $i } + /Call Trace: / \ + { exit } + ' >> /tmp/kgdb.$$ +fi + +if [ x$pid != x ] ; then + echo 'set $pid='$pid >> /tmp/kgdb.$$ + echo 'set-task' >> /tmp/kgdb.$$ +fi + +echo "where" >> /tmp/kgdb.$$ + +#cat /tmp/kgdb.$$ +gdb -x /tmp/kgdb.$$ + +rm -f /tmp/kgdb.* /dev/xkdebug +rmmod xkdebug Index: linux/scripts/memleak/FAQ diff -u /dev/null linux/scripts/memleak/FAQ:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/FAQ Sun Jan 24 20:14:00 1999 @@ -0,0 +1,83 @@ + + + how to find memory leaks + +first, since the kernel is written in C, memory leaks are hard +to find. Most if not all data given by this patch/tools are +heuristic, use common sense and testing before calling something a +'memory leak'. + +terms: + + - 'allocation point': a certain point in the kernel source where + a memory object is allocated via some allocator, eg. kmalloc() + or get_free_pages(). + + - 'allocation count': the number of not-yet-freed allocations + done at a certain allocation point. + + - 'memory leak': a 'forgotten' buffer, too many such buffers + might cause system slowdown/hangups. + + +install the patch and reboot into the new kernel. You should +see the /proc/memleak file, with such contents: + +pc7537:~> head /proc/memleak +<00000000> jiffies.c:0 (8179) +<00001000> vmalloc.c:124 (0) +<00002000> filemap.c:274 (7946) +<00003000> vmalloc.c:124 (0) +<00004000> fork.c:237 (0) +<00005000> fork.c:186 (0) +<00005800> fork.c:186 (0) +<00006000> fork.c:237 (0) +<00007000> vmalloc.c:124 (0) +<00008000> buffer.c:1349 (8156) + +The first entry is a 'special' entry. + +units are seconds since bootup when it was allocated. The 'jiffies' line +shows the current 'elapsed seconds' value. + +eg.: + +<00002000> memory.c:930 (4838) <---------------------- was allocated 4838 + seconds after boot. + ^---------------------- was allocated here + ^------- object address + + +the special entry 'jiffies.c' shows elapsed time since bootup: + +<00000000> jiffies.c:0 (5269) <---------- 5269 seconds elapsed since this + system booted. + + + +the second thing you might want to try is the 'dosum' script: + +774 buffer.c:1349 +9 console.c:322 +9 console.c:325 + +the first number is the 'allocation count', the number of memory objects +allocated in a certain FILE:LINE. If some allocation point shows a constantly +increasing allocation count, it's probably a memory leak. + +NOTE: the VM subsystems usually have very fluctuating allocation counts, +think twice before calling them a memory leak. + +piping /proc/memleak through the 'cutvm' script filters these allocations +out. + +There are other scripts too, read the comments in them to find out what +they do ... and you might want to write custom scripts yourself, if you +have a specific thing to debug. The memcheck.sh script stores the current +allocation map into a RCS tree. RCS is storing 'delta' maps very +effectively. Use "rcsdiff -r{} -r{}" to see the delta quickly. + +thats all for now, + + Ingo + Index: linux/scripts/memleak/Makefile diff -u /dev/null linux/scripts/memleak/Makefile:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/Makefile Sun Jan 24 20:14:00 1999 @@ -0,0 +1,9 @@ +all: findaddr + +TMPCFLAGS=$(CFLAGS:%-pg=%) +# True userspace program, remove the __KERNEL__ flag +findaddr: findaddr.c + $(CC) $(TMPCFLAGS:%-g=%) -U__KERNEL__ -o findaddr findaddr.c + +clean: + rm -f findaddr Index: linux/scripts/memleak/doalloc diff -u /dev/null linux/scripts/memleak/doalloc:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/doalloc Sun Jan 24 20:14:00 1999 @@ -0,0 +1,11 @@ +#!/bin/bash + +# +# this script post-processes files generated by 'dorun'. +# it lists allocation points and multiple allocation counts in one line +# one line shows how a particular allocation point 'evolves' in time +# + + +for N in `cut -f2 9*| sort| uniq`; do echo $N: `grep $N 9*| cut -d: -f4| cut -f1`; done + Index: linux/scripts/memleak/docutvm diff -u /dev/null linux/scripts/memleak/docutvm:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/docutvm Sun Jan 24 20:14:00 1999 @@ -0,0 +1,9 @@ +#!/bin/bash + +# +# this script eliminates 'VM/buffer cache' allocations, these +# are harder to understand and their allocation count fluctuates wildly. +# + +grep -v slab.c | grep -v memory.c | grep -v swap_state.c | grep -v filemap.c | grep -v file_table.c | grep -v buffer.c | grep -v dcache.c | grep -v pgtable | grep -v mmap.c | grep -v fork.c | grep -v exec.c + Index: linux/scripts/memleak/dodelta diff -u /dev/null linux/scripts/memleak/dodelta:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/dodelta Sun Jan 24 20:14:00 1999 @@ -0,0 +1,9 @@ +#!/bin/bash + +# +# same as 'doalloc', but it lists delta allocations, not full number +# of allocations +# + +for N in `cut -f2 9*| sort| uniq`; do ( P=0;F=1;for M in `grep $N 9*| cut -d: -f4| cut -f1`; do if [ "$F" = 1 ]; then F=0; FIRST=$M; fi; echo $[$M-$P]; P=$M; done; echo "DELTA: $[$M-$FIRST]";) | xargs echo $N: ; done + Index: linux/scripts/memleak/dofind diff -u /dev/null linux/scripts/memleak/dofind:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/dofind Sun Jan 24 20:14:00 1999 @@ -0,0 +1,14 @@ +#!/bin/bash + +# +# this script lists wether all 'currently allocated' memory +# objects are actually referenced to in the kernel ... this +# isnt a 100% sure method, but usually a 'used' object has it's address +# listed somewhere, while a 'leaked' object doesnt have any +# references anymore. +# + +cp /proc/memleak /tmp/leak3 + +for N in `cat /tmp/leak3 | cut -c2-9`; do findaddr 0x$N; done + Index: linux/scripts/memleak/dorun diff -u /dev/null linux/scripts/memleak/dorun:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/dorun Sun Jan 24 20:14:00 1999 @@ -0,0 +1,14 @@ +#!/bin/bash + +# +# this script puts an 'allocation summary' into the current +# directory every 10 seconds. +# +# you can analyze these files via 'doalloc' and 'dodelta', +# to find suspicious allocation points. +# + +while true; do + FILE=`date +'%y-%m-%d__%T'`; sync; sleep 10; echo $FILE; dosum > $FILE; +done + Index: linux/scripts/memleak/dosum diff -u /dev/null linux/scripts/memleak/dosum:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/dosum Sun Jan 24 20:14:00 1999 @@ -0,0 +1,8 @@ +#!/bin/bash + +# +# generates 'current allocation summary' +# + +(cat /proc/memleak | cut -d'(' -f1 > /tmp/leak; cat /tmp/leak )| cut -c12- | sort | gawk -- 'BEGIN{Y=0;}//{if ($0 == X) {Y=Y+1;} else {if (Y) printf ("%d\t %s\n", Y, X); Y=1;} X=$0}END{ printf ("%d\t %s\n", Y, $0);}' + Index: linux/scripts/memleak/dotimestamp diff -u /dev/null linux/scripts/memleak/dotimestamp:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/dotimestamp Sun Jan 24 20:14:00 1999 @@ -0,0 +1,10 @@ +#!/bin/bash + +# +# this script generates a timestamp-sorted list of allocations. +# +# 'old' (low timestamp value) allocations have a higher chance +# that they are actually leaked away objects. +# + +cp /proc/memleak /tmp/leak2; sort -n -t'(' +1 /tmp/leak2 Index: linux/scripts/memleak/findaddr.c diff -u /dev/null linux/scripts/memleak/findaddr.c:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/findaddr.c Sun Jan 24 20:14:00 1999 @@ -0,0 +1,68 @@ + +/* + * find a pointer in /proc/kcore (all system memory) + * + * a leaked object probably hasnt got any references in + * memory. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define N 4096 + +char buffer [N]; + +char * fool_libc = "0x0deadbee"; + +int main(int argc, char * * argv) +{ + int file = open("/proc/kcore",O_RDONLY); + int n,i,hits=0; + unsigned int addr, pos=0, fool_addr; + + if (argc != 2) { + fprintf(stderr,"usage: findaddr 0x\n"); + exit(-1); + } + if (file==-1) { + perror("couldn't open /proc/kcore\n"); + exit(-1); + } + sscanf(argv[1],"0x%08x",&addr); + + addr--; + + sscanf(fool_libc,"0x%08x",&fool_addr); + + while ((n = read(file,buffer,N)) > 0) { + for (i=0; i<=n-sizeof(int); i++) { + + if ((*((int *)&(buffer[i])))-1 == addr) { + if (++hits) { + printf("found 0x%08x at %08x\n", addr+1, pos+i*sizeof(int)); + goto out; + } + } + + } + pos += n; + } + if (!n) + printf("0x%08x not found!\n", addr+1); +out: + return (0); +} + + Index: linux/scripts/memleak/memcheck.sh diff -u /dev/null linux/scripts/memleak/memcheck.sh:1.1.3.1 --- /dev/null Sat Apr 24 02:50:47 1999 +++ linux/scripts/memleak/memcheck.sh Sun Jan 24 20:14:00 1999 @@ -0,0 +1,7 @@ +#!/bin/bash +TEMPFILE=`tempfile` +LOGFILE=/var/adm/memleak + cat /proc/memleak | cut -c12- | sort | gawk -- 'BEGIN{Y=0;}//{if ($0 == X) {Y=Y+1;} else {if (Y) printf ("%d\t %s\n", Y, X); Y=1;} X=$0}END{printf ("%d\t %s\n", Y, $0);}' > $TEMPFILE +co -l $LOGFILE &>/dev/null +mv $TEMPFILE $LOGFILE +echo "." | ci $LOGFILE &>/dev/null